load data

MBIRA_Kpn <- read_delim("MBIRA_ValidationOutput/Prediction_Pheno_summary_Kpn.csv")
MBIRA_Kvv <- read_delim("MBIRA_ValidationOutput/Prediction_Pheno_summary_Kvv.csv")
HowdenGorrie <- read_delim("HowdenGorrie_ValidationOutput/Prediction_Pheno_summary.csv")
AGAR <- read_delim("AGAR_ValidationOutput/Prediction_Pheno_summary.csv")
Milan <- read_delim("MilanSanRafaelle_ValidationOutput/Prediction_Pheno_summary.csv")
HailuEthiopia <- read_delim("HailuEthiopia_ValidationOutput/Prediction_Pheno_summary.csv")
FeaseyMalawi <- read_delim("FeaseyMalawi_ValidationOutput/Prediction_Pheno_summary.csv")
KayamaJapan <- read_delim("KayamaJapan_ValidationOutput/Prediction_Pheno_summary.csv")
EUSCAPE_EURECA <- read_delim("EURECA_EUSCAPE_ValidationOutput/Prediction_Pheno_summary.csv")
EgliSwitzerland <- read_delim("EgliSwitzerland_2025_ValidationOutput/Prediction_Pheno_summary.csv")
HRYC_Spain <- read_delim("HRYC_Spain_ValidationOutput/Prediction_Pheno_summary.csv")
IndiaGHRU <- read_delim("IndiaGHRU_2025_ValidationOutput/Prediction_Pheno_summary.csv")
NeumannGermany <- read_delim("NeumannGermany_ValidationOutput/Prediction_Pheno_summary.csv")
Whitelaw_CHAMPS <- read_delim("Whitelaw_Stellenbosch_ValidationOutput/Prediction_Pheno_summary_DD.csv")
Oxfordshire_Kpn <- read_delim("Oxfordshire_ValidationOutput/Prediction_Pheno_summary_Kpn.csv")
Oxfordshire_Kvv <- read_delim("Oxfordshire_ValidationOutput/Prediction_Pheno_summary_Kvv.csv")
IZSAM_Italy <- read_delim("IZSAM_Italy_ValidationOutput/Prediction_Pheno_summary.csv")
LaPaz_Lazaro <- read_delim("LaPaz_Lazaro_ValidationOutput/Prediction_Pheno_summary.csv")
HarrisMERINO <- read_delim("HarrisMERINO_ValidationOutput/Prediction_Pheno_summary.csv")
PachecoBrazil <- read_delim("PachecoBrazil_ValidationOutput/Prediction_Pheno_summary.csv")

set colours

wt_colours <- c(`non-wt (I/R)`="IndianRed", `wt (S)`= "LightBlue")
res_colours <- c("I"="grey", "R"="IndianRed", "S"="LightBlue", "NWT"="grey")
res_colours2 <- c("I"="black", "R"="IndianRed", "S"="LightBlue", "NWT"="black")
res_colours3 <- c("NWT I"="#78638a", "NWT R"="IndianRed", "WT S"="LightBlue")
# position dodge for coefficient plots
pd <- position_dodge(width=0.8)

functions to calculate PPV and categorical agreement from models

cat_agree <- function(xtabs) {
  return((xtabs[1,1]+xtabs[2,2])/sum(xtabs))
}

cat_agree_lb <- function(xtabs) {
  return(
   ((xtabs[1,1]+xtabs[2,2])/sum(xtabs)) - (1.96 * sqrt(((xtabs[1,1]+xtabs[2,2])/sum(xtabs)) * ((1 - ((xtabs[1,1]+xtabs[2,2])/sum(xtabs)))) / sum(xtabs)))
  )
}

cat_agree_ub <- function(xtabs) {
  return(
   ((xtabs[1,1]+xtabs[2,2])/sum(xtabs)) + (1.96 * sqrt(((xtabs[1,1]+xtabs[2,2])/sum(xtabs)) * ((1 - ((xtabs[1,1]+xtabs[2,2])/sum(xtabs)))) / sum(xtabs)))
  )
}

ppv <- function(xtabs) {
  return(xtabs[2,2]/sum(xtabs[2,]))
}

ppv_lb <- function(xtabs) {
  return(
   xtabs[2,2]/(sum(xtabs[2,])) - (1.96 * sqrt((xtabs[2,2]/(sum(xtabs[2,]))) * ((1 - xtabs[2,2]/(sum(xtabs[2,]))) / sum(xtabs[2,])))
  ))
}

ppv_ub <- function(xtabs) {
  return(
   xtabs[2,2]/(sum(xtabs[2,])) + (1.96 * sqrt((xtabs[2,2]/(sum(xtabs[2,]))) * ((1 - xtabs[2,2]/(sum(xtabs[2,]))) / sum(xtabs[2,])))
  ))
}


npv <- function(xtabs) {
  return(xtabs[1,1]/sum(xtabs[1,]))
}


npv_lb <- function(xtabs) {
  return(
   xtabs[1,1]/(sum(xtabs[1,])) - (1.96 * sqrt((xtabs[1,1]/(sum(xtabs[1,]))) * ((1 - xtabs[1,1]/(sum(xtabs[1,]))) / sum(xtabs[1,])))
  ))
}


npv_ub <- function(xtabs) {
  return(
   xtabs[1,1]/(sum(xtabs[1,])) + (1.96 * sqrt((xtabs[1,1]/(sum(xtabs[1,]))) * ((1 - xtabs[1,1]/(sum(xtabs[1,]))) / sum(xtabs[1,])))
  ))
}



#sens <- function(pred,truth) {
#  xtabs <- table(pred,truth)
#  return(xtabs[2,2]/(sum(xtabs[,2])))
#}

sens <- function(xtabs) {
  return(xtabs[2,2]/(sum(xtabs[,2])))
}

sens_lb <- function(xtabs) {
  return(
   xtabs[2,2]/(sum(xtabs[,2])) - (1.96 * sqrt((xtabs[2,2]/(sum(xtabs[,2]))) * ((1 - xtabs[2,2]/(sum(xtabs[,2]))) / sum(xtabs[,2])))
  ))
}

sens_ub <- function(xtabs) {
  return(
   xtabs[2,2]/(sum(xtabs[,2])) + (1.96 * sqrt((xtabs[2,2]/(sum(xtabs[,2]))) * ((1 - xtabs[2,2]/(sum(xtabs[,2]))) / sum(xtabs[,2])))
  ))
}

spec <- function(xtabs) {
  return(xtabs[1,1]/(sum(xtabs[,1])))
}

spec_lb <- function(xtabs) {
  return(
   xtabs[1,1]/(sum(xtabs[,1])) - (1.96 * sqrt((xtabs[1,1]/(sum(xtabs[,1]))) * ((1 - xtabs[1,1]/(sum(xtabs[,1]))) / sum(xtabs[,1])))
  ))
}

spec_ub <- function(xtabs) {
  return(
   xtabs[1,1]/(sum(xtabs[,1])) + (1.96 * sqrt((xtabs[1,1]/(sum(xtabs[,1]))) * ((1 - xtabs[1,1]/(sum(xtabs[,1]))) / sum(xtabs[,1])))
  ))
}

me <- function(xtabs) {
  return(xtabs[2,1]/sum(xtabs[,1]))
}

me_lb <- function(xtabs) {
  return(
   xtabs[2,1]/(sum(xtabs[,1])) - (1.96 * sqrt((xtabs[2,1]/(sum(xtabs[,1]))) * ((1 - xtabs[2,1]/(sum(xtabs[,1]))) / sum(xtabs[,1])))
  ))
}

me_ub <- function(xtabs) {
  return(
   xtabs[2,1]/(sum(xtabs[,1])) + (1.96 * sqrt((xtabs[2,1]/(sum(xtabs[,1]))) * ((1 - xtabs[2,1]/(sum(xtabs[,1]))) / sum(xtabs[,1])))
  ))
}

vme <- function(xtabs) {
  return(xtabs[1,2]/sum(xtabs[,2]))
}

vme_lb <- function(xtabs) {
  return(
   xtabs[1,2]/(sum(xtabs[,2])) - (1.96 * sqrt((xtabs[1,2]/(sum(xtabs[,2]))) * ((1 - xtabs[1,2]/(sum(xtabs[,2]))) / sum(xtabs[,2])))
  ))
}

vme_ub <- function(xtabs) {
  return(
   xtabs[1,2]/(sum(xtabs[,2])) + (1.96 * sqrt((xtabs[1,2]/(sum(xtabs[,2]))) * ((1 - xtabs[1,2]/(sum(xtabs[,2]))) / sum(xtabs[,2])))
  ))
}
perc_R <- function(xtabs) {
  return(sum(xtabs[,2])/sum(xtabs))
}

metrics <- function(xtabs) {
  return(list(cat=cat_agree(xtabs),
              cat_lb=cat_agree_lb(xtabs),
              cat_ub=cat_agree_ub(xtabs),
              ppv=ppv(xtabs),
              ppv_lb=ppv_lb(xtabs),
              ppv_ub=ppv_ub(xtabs),
              npv=npv(xtabs),
              npv_lb=npv_lb(xtabs),
              npv_ub=npv_ub(xtabs),
              sens=sens(xtabs),
              sens_lb=sens_lb(xtabs),
              sens_ub=sens_ub(xtabs),
              spec=spec(xtabs),
              spec_lb=spec_lb(xtabs),
              spec_ub=spec_ub(xtabs),
              me=me(xtabs),
              me_lb=me_lb(xtabs),
              me_ub=me_ub(xtabs),
              vme=vme(xtabs),
              vme_lb=vme_lb(xtabs),
              vme_ub=vme_ub(xtabs),
              perc_R=perc_R(xtabs),
              n=sum(xtabs),
              summary=c("cat"=cat_agree(xtabs),
                        "sens"=sens(xtabs),
                        "spec"=spec(xtabs),
                        "me"=me(xtabs),
                        "vme"=vme(xtabs),
                        "cat_lb"=cat_agree_lb(xtabs), 
                        "cat_ub"=cat_agree_ub(xtabs),
                    "sens_lb"=sens_lb(xtabs),
                    "sens_ub"=sens_ub(xtabs),
                    "spec_lb"=spec_lb(xtabs),
                    "spec_ub"=spec_ub(xtabs),                   
                    "me_lb"=me_lb(xtabs),
                    "me_ub"=me_ub(xtabs),   
                    "vme_lb"=vme_lb(xtabs),
                    "vme_ub"=vme_ub(xtabs),   
                    "perc_R"=perc_R(xtabs),
                    "n"=sum(xtabs)
              )
          )
    )
}

data wrangling

MBIRA_Kpn_SI <- MBIRA_Kpn %>% 
  rename(Group=...1) %>% 
  filter(Group=="pred_S" | Group=="pred_I") %>%
  rowwise()%>%
  mutate(S_I = sum(S, I)) %>%
  mutate(I_R = sum(I, R)) %>%
  select(-c(I)) %>%
  adorn_totals("row") %>%
  mutate(Group = gsub("Total", "pred_S_I", Group)) 

MBIRA_Kpn_R <- MBIRA_Kpn %>% 
  rename(Group=...1) %>% 
  filter(Group=="pred_R" | Group=="pred_I") %>%
  rowwise()%>%
  mutate(S_I = sum(S, I)) %>%
  mutate(I_R = sum(I, R)) %>%
  select(-c(I)) %>%
  adorn_totals("row") %>%
  mutate(Group = gsub("Total", "pred_I_R", Group)) 

MBIRA_Kpn_SIR <- rbind(MBIRA_Kpn_SI, MBIRA_Kpn_R) %>% distinct()

subset(MBIRA_Kpn_SIR, Group == "pred_S_I")$S_I
## [1] 69
table(MBIRA_Kpn_SIR$S_I)
## 
## 12 57 58 69 70 
##  1  1  1  1  1
MBIRA_Kpn_R_prediction <- matrix(c(subset(MBIRA_Kpn_SIR, Group == "pred_S_I")$S_I, 
                subset(MBIRA_Kpn_SIR, Group == "pred_S_I")$R,
                subset(MBIRA_Kpn_SIR, Group == "pred_R")$S_I,
                subset(MBIRA_Kpn_SIR, Group == "pred_R")$R), ncol=2, byrow=TRUE)

colnames(MBIRA_Kpn_R_prediction) <- c('S_I','R')
rownames(MBIRA_Kpn_R_prediction) <- c('pred_S_I','pred_R')
MBIRA_Kpn_R_prediction_summary<-metrics(MBIRA_Kpn_R_prediction)

MBIRA_Kpn_NWT_prediction <- matrix(c(subset(MBIRA_Kpn_SIR, Group == "pred_S")$S, 
                subset(MBIRA_Kpn_SIR, Group == "pred_S")$I_R,
                subset(MBIRA_Kpn_SIR, Group == "pred_I_R")$S,
                subset(MBIRA_Kpn_SIR, Group == "pred_I_R")$I_R), ncol=2, byrow=TRUE)
colnames(MBIRA_Kpn_NWT_prediction) <- c('S','I_R')
rownames(MBIRA_Kpn_NWT_prediction) <- c('pred_S','pred_I_R')
MBIRA_Kpn_NWT_prediction_summary<-metrics(MBIRA_Kpn_NWT_prediction)
MBIRA_Kvv_SI <- MBIRA_Kvv %>% 
  rename(Group=...1) %>% 
  filter(Group=="pred_S" | Group=="pred_I") %>%
  rowwise()%>%
  mutate(S_I = sum(S, I)) %>%
  mutate(I_R = sum(I, R)) %>%
  select(-c(I)) %>%
  adorn_totals("row") %>%
  mutate(Group = gsub("Total", "pred_S_I", Group)) 

MBIRA_Kvv_R <- MBIRA_Kvv %>% 
  rename(Group=...1) %>% 
  filter(Group=="pred_R" | Group=="pred_I") %>%
  rowwise()%>%
  mutate(S_I = sum(S, I)) %>%
  mutate(I_R = sum(I, R)) %>%
  select(-c(I)) %>%
  adorn_totals("row") %>%
  mutate(Group = gsub("Total", "pred_I_R", Group)) 

MBIRA_Kvv_SIR <- rbind(MBIRA_Kvv_SI, MBIRA_Kvv_R) %>% distinct()

subset(MBIRA_Kvv_SIR, Group == "pred_S_I")$S_I
## [1] 86
table(MBIRA_Kvv_SIR$S_I)
## 
##  0 86 
##  3  2
MBIRA_Kvv_R_prediction <- matrix(c(subset(MBIRA_Kvv_SIR, Group == "pred_S_I")$S_I, 
                subset(MBIRA_Kvv_SIR, Group == "pred_S_I")$R,
                subset(MBIRA_Kvv_SIR, Group == "pred_R")$S_I,
                subset(MBIRA_Kvv_SIR, Group == "pred_R")$R), ncol=2, byrow=TRUE)

colnames(MBIRA_Kvv_R_prediction) <- c('S_I','R')
rownames(MBIRA_Kvv_R_prediction) <- c('pred_S_I','pred_R')
MBIRA_Kvv_R_prediction_summary<-metrics(MBIRA_Kvv_R_prediction)

MBIRA_Kvv_NWT_prediction <- matrix(c(subset(MBIRA_Kvv_SIR, Group == "pred_S")$S, 
                subset(MBIRA_Kvv_SIR, Group == "pred_S")$I_R,
                subset(MBIRA_Kvv_SIR, Group == "pred_I_R")$S,
                subset(MBIRA_Kvv_SIR, Group == "pred_I_R")$I_R), ncol=2, byrow=TRUE)
colnames(MBIRA_Kvv_NWT_prediction) <- c('S','I_R')
rownames(MBIRA_Kvv_NWT_prediction) <- c('pred_S','pred_I_R')
MBIRA_Kvv_NWT_prediction_summary<-metrics(MBIRA_Kvv_NWT_prediction)
HowdenGorrie_SI <- HowdenGorrie %>% 
  rename(Group=...1) %>% 
  filter(Group=="pred_S" | Group=="pred_I") %>%
  rowwise()%>%
  mutate(S_I = sum(S, I)) %>%
  mutate(I_R = sum(I, R)) %>%
  select(-c(I)) %>%
  adorn_totals("row") %>%
  mutate(Group = gsub("Total", "pred_S_I", Group)) 

HowdenGorrie_R <- HowdenGorrie %>% 
  rename(Group=...1) %>% 
  filter(Group=="pred_R" | Group=="pred_I") %>%
  rowwise()%>%
  mutate(S_I = sum(S, I)) %>%
  mutate(I_R = sum(I, R)) %>%
  select(-c(I)) %>%
  adorn_totals("row") %>%
  mutate(Group = gsub("Total", "pred_I_R", Group)) 

HowdenGorrie_SIR <- rbind(HowdenGorrie_SI, HowdenGorrie_R) %>% distinct()

subset(HowdenGorrie_SIR, Group == "pred_S_I")$S_I
## [1] 44
table(HowdenGorrie_SIR$S_I)
## 
##  3 15 18 41 44 
##  1  1  1  1  1
HowdenGorrie_R_prediction <- matrix(c(subset(HowdenGorrie_SIR, Group == "pred_S_I")$S_I, 
                subset(HowdenGorrie_SIR, Group == "pred_S_I")$R,
                subset(HowdenGorrie_SIR, Group == "pred_R")$S_I,
                subset(HowdenGorrie_SIR, Group == "pred_R")$R), ncol=2, byrow=TRUE)

colnames(HowdenGorrie_R_prediction) <- c('S_I','R')
rownames(HowdenGorrie_R_prediction) <- c('pred_S_I','pred_R')
HowdenGorrie_R_prediction_summary<-metrics(HowdenGorrie_R_prediction)

HowdenGorrie_NWT_prediction <- matrix(c(subset(HowdenGorrie_SIR, Group == "pred_S")$S, 
                subset(HowdenGorrie_SIR, Group == "pred_S")$I_R,
                subset(HowdenGorrie_SIR, Group == "pred_I_R")$S,
                subset(HowdenGorrie_SIR, Group == "pred_I_R")$I_R), ncol=2, byrow=TRUE)
colnames(HowdenGorrie_NWT_prediction) <- c('S','I_R')
rownames(HowdenGorrie_NWT_prediction) <- c('pred_S','pred_I_R')
HowdenGorrie_NWT_prediction_summary<-metrics(HowdenGorrie_NWT_prediction)
AGAR_SI <- AGAR %>% 
  rename(Group=...1) %>% 
  filter(Group=="pred_S" | Group=="pred_I") %>%
  rowwise()%>%
  mutate(S_I = sum(S, I)) %>%
  mutate(I_R = sum(I, R)) %>%
  select(-c(I)) %>%
  adorn_totals("row") %>%
  mutate(Group = gsub("Total", "pred_S_I", Group)) 


AGAR_R <- AGAR %>% 
  rename(Group=...1) %>% 
  filter(Group=="pred_R" | Group=="pred_I") %>%
  rowwise()%>%
  mutate(S_I = sum(S, I)) %>%
  mutate(I_R = sum(I, R)) %>%
  select(-c(I)) %>%
  adorn_totals("row") %>%
  mutate(Group = gsub("Total", "pred_I_R", Group)) 

AGAR_SIR <- rbind(AGAR_SI, AGAR_R) %>% distinct()

subset(AGAR_SIR, Group == "pred_S_I")$S_I
## [1] 178
table(AGAR_SIR$S_I)
## 
##  19  52  71 159 178 
##   1   1   1   1   1
AGAR_R_prediction <- matrix(c(subset(AGAR_SIR, Group == "pred_S_I")$S_I, 
                subset(AGAR_SIR, Group == "pred_S_I")$R,
                subset(AGAR_SIR, Group == "pred_R")$S_I,
                subset(AGAR_SIR, Group == "pred_R")$R), ncol=2, byrow=TRUE)

colnames(AGAR_R_prediction) <- c('S_I','R')
rownames(AGAR_R_prediction) <- c('pred_S_I','pred_R')
AGAR_R_prediction_summary<-metrics(AGAR_R_prediction)

AGAR_NWT_prediction <- matrix(c(subset(AGAR_SIR, Group == "pred_S")$S, 
                subset(AGAR_SIR, Group == "pred_S")$I_R,
                subset(AGAR_SIR, Group == "pred_I_R")$S,
                subset(AGAR_SIR, Group == "pred_I_R")$I_R), ncol=2, byrow=TRUE)
colnames(AGAR_NWT_prediction) <- c('S','I_R')
rownames(AGAR_NWT_prediction) <- c('pred_S','pred_I_R')
AGAR_NWT_prediction_summary<-metrics(AGAR_NWT_prediction)
Milan_SI <- Milan %>% 
  rename(Group=...1) %>% 
  filter(Group=="pred_S" | Group=="pred_I") %>%
  rowwise()%>%
  mutate(S_I = sum(S, I)) %>%
  mutate(I_R = sum(I, R)) %>%
  select(-c(I)) %>%
  adorn_totals("row") %>%
  mutate(Group = gsub("Total", "pred_S_I", Group)) 


Milan_R <- Milan %>% 
  rename(Group=...1) %>% 
  filter(Group=="pred_R" | Group=="pred_I") %>%
  rowwise()%>%
  mutate(S_I = sum(S, I)) %>%
  mutate(I_R = sum(I, R)) %>%
  select(-c(I)) %>%
  adorn_totals("row") %>%
  mutate(Group = gsub("Total", "pred_I_R", Group)) 

Milan_SIR <- rbind(Milan_SI, Milan_R) %>% distinct()

subset(Milan_SIR, Group == "pred_S_I")$S_I
## [1] 9
table(Milan_SIR$S_I)
## 
## 0 2 9 
## 1 2 2
Milan_R_prediction <- matrix(c(subset(Milan_SIR, Group == "pred_S_I")$S_I, 
                subset(Milan_SIR, Group == "pred_S_I")$R,
                subset(Milan_SIR, Group == "pred_R")$S_I,
                subset(Milan_SIR, Group == "pred_R")$R), ncol=2, byrow=TRUE)

colnames(Milan_R_prediction) <- c('S_I','R')
rownames(Milan_R_prediction) <- c('pred_S_I','pred_R')
Milan_R_prediction_summary<-metrics(Milan_R_prediction)

Milan_NWT_prediction <- matrix(c(subset(Milan_SIR, Group == "pred_S")$S, 
                subset(Milan_SIR, Group == "pred_S")$I_R,
                subset(Milan_SIR, Group == "pred_I_R")$S,
                subset(Milan_SIR, Group == "pred_I_R")$I_R), ncol=2, byrow=TRUE)
colnames(Milan_NWT_prediction) <- c('S','I_R')
rownames(Milan_NWT_prediction) <- c('pred_S','pred_I_R')
Milan_NWT_prediction_summary<-metrics(Milan_NWT_prediction)
HailuEthiopia_SI <- HailuEthiopia %>% 
  rename(Group=...1) %>% 
  filter(Group=="pred_S" | Group=="pred_I") %>%
  rowwise()%>%
  mutate(S_I = sum(S, I)) %>%
  mutate(I_R = sum(I, R)) %>%
  select(-c(I)) %>%
  adorn_totals("row") %>%
  mutate(Group = gsub("Total", "pred_S_I", Group)) 


HailuEthiopia_R <- HailuEthiopia %>% 
  rename(Group=...1) %>% 
  filter(Group=="pred_R" | Group=="pred_I") %>%
  rowwise()%>%
  mutate(S_I = sum(S, I)) %>%
  mutate(I_R = sum(I, R)) %>%
  select(-c(I)) %>%
  adorn_totals("row") %>%
  mutate(Group = gsub("Total", "pred_I_R", Group)) 

HailuEthiopia_SIR <- rbind(HailuEthiopia_SI, HailuEthiopia_R) %>% distinct()

subset(HailuEthiopia_SIR, Group == "pred_S_I")$S_I
## [1] 42
table(HailuEthiopia_SIR$S_I)
## 
##  0 42 
##  3  2
HailuEthiopia_R_prediction <- matrix(c(subset(HailuEthiopia_SIR, Group == "pred_S_I")$S_I, 
                subset(HailuEthiopia_SIR, Group == "pred_S_I")$R,
                subset(HailuEthiopia_SIR, Group == "pred_R")$S_I,
                subset(HailuEthiopia_SIR, Group == "pred_R")$R), ncol=2, byrow=TRUE)

colnames(HailuEthiopia_R_prediction) <- c('S_I','R')
rownames(HailuEthiopia_R_prediction) <- c('pred_S_I','pred_R')
HailuEthiopia_R_prediction_summary<-metrics(HailuEthiopia_R_prediction)

HailuEthiopia_NWT_prediction <- matrix(c(subset(HailuEthiopia_SIR, Group == "pred_S")$S, 
                subset(HailuEthiopia_SIR, Group == "pred_S")$I_R,
                subset(HailuEthiopia_SIR, Group == "pred_I_R")$S,
                subset(HailuEthiopia_SIR, Group == "pred_I_R")$I_R), ncol=2, byrow=TRUE)
colnames(HailuEthiopia_NWT_prediction) <- c('S','I_R')
rownames(HailuEthiopia_NWT_prediction) <- c('pred_S','pred_I_R')
HailuEthiopia_NWT_prediction_summary<-metrics(HailuEthiopia_NWT_prediction)
KayamaJapan_SI <- KayamaJapan %>% 
  rename(Group=...1) %>% 
  filter(Group=="pred_S" | Group=="pred_I") %>%
  rowwise()%>%
  mutate(S_I = sum(S, I)) %>%
  mutate(I_R = sum(I, R)) %>%
  select(-c(I)) %>%
  adorn_totals("row") %>%
  mutate(Group = gsub("Total", "pred_S_I", Group)) 


KayamaJapan_R <- KayamaJapan %>% 
  rename(Group=...1) %>% 
  filter(Group=="pred_R" | Group=="pred_I") %>%
  rowwise()%>%
  mutate(S_I = sum(S, I)) %>%
  mutate(I_R = sum(I, R)) %>%
  select(-c(I)) %>%
  adorn_totals("row") %>%
  mutate(Group = gsub("Total", "pred_I_R", Group)) 

KayamaJapan_SIR <- rbind(KayamaJapan_SI, KayamaJapan_R) %>% distinct()

subset(KayamaJapan_SIR, Group == "pred_S_I")$S_I
## [1] 229
table(KayamaJapan_SIR$S_I)
## 
##  17 156 173 212 229 
##   1   1   1   1   1
KayamaJapan_R_prediction <- matrix(c(subset(KayamaJapan_SIR, Group == "pred_S_I")$S_I, 
                subset(KayamaJapan_SIR, Group == "pred_S_I")$R,
                subset(KayamaJapan_SIR, Group == "pred_R")$S_I,
                subset(KayamaJapan_SIR, Group == "pred_R")$R), ncol=2, byrow=TRUE)

colnames(KayamaJapan_R_prediction) <- c('S_I','R')
rownames(KayamaJapan_R_prediction) <- c('pred_S_I','pred_R')
KayamaJapan_R_prediction_summary<-metrics(KayamaJapan_R_prediction)

KayamaJapan_NWT_prediction <- matrix(c(subset(KayamaJapan_SIR, Group == "pred_S")$S, 
                subset(KayamaJapan_SIR, Group == "pred_S")$I_R,
                subset(KayamaJapan_SIR, Group == "pred_I_R")$S,
                subset(KayamaJapan_SIR, Group == "pred_I_R")$I_R), ncol=2, byrow=TRUE)
colnames(KayamaJapan_NWT_prediction) <- c('S','I_R')
rownames(KayamaJapan_NWT_prediction) <- c('pred_S','pred_I_R')
KayamaJapan_NWT_prediction_summary<-metrics(KayamaJapan_NWT_prediction)
FeaseyMalawi_SI <- FeaseyMalawi %>% 
  rename(Group=...1) %>% 
  filter(Group=="pred_S" | Group=="pred_I") %>%
  rowwise()%>%
  mutate(S_I = sum(S, I)) %>%
  mutate(I_R = sum(I, R)) %>%
  select(-c(I)) %>%
  adorn_totals("row") %>%
  mutate(Group = gsub("Total", "pred_S_I", Group)) 


FeaseyMalawi_R <- FeaseyMalawi %>% 
  rename(Group=...1) %>% 
  filter(Group=="pred_R" | Group=="pred_I") %>%
  rowwise()%>%
  mutate(S_I = sum(S, I)) %>%
  mutate(I_R = sum(I, R)) %>%
  select(-c(I)) %>%
  adorn_totals("row") %>%
  mutate(Group = gsub("Total", "pred_I_R", Group)) 

FeaseyMalawi_SIR <- rbind(FeaseyMalawi_SI, FeaseyMalawi_R) %>% distinct()

subset(FeaseyMalawi_SIR, Group == "pred_S_I")$S_I
## [1] 311
table(FeaseyMalawi_SIR$S_I)
## 
##   5  15  20 306 311 
##   1   1   1   1   1
FeaseyMalawi_R_prediction <- matrix(c(subset(FeaseyMalawi_SIR, Group == "pred_S_I")$S_I, 
                subset(FeaseyMalawi_SIR, Group == "pred_S_I")$R,
                subset(FeaseyMalawi_SIR, Group == "pred_R")$S_I,
                subset(FeaseyMalawi_SIR, Group == "pred_R")$R), ncol=2, byrow=TRUE)

colnames(FeaseyMalawi_R_prediction) <- c('S_I','R')
rownames(FeaseyMalawi_R_prediction) <- c('pred_S_I','pred_R')
FeaseyMalawi_R_prediction_summary<-metrics(FeaseyMalawi_R_prediction)

FeaseyMalawi_NWT_prediction <- matrix(c(subset(FeaseyMalawi_SIR, Group == "pred_S")$S, 
                subset(FeaseyMalawi_SIR, Group == "pred_S")$I_R,
                subset(FeaseyMalawi_SIR, Group == "pred_I_R")$S,
                subset(FeaseyMalawi_SIR, Group == "pred_I_R")$I_R), ncol=2, byrow=TRUE)
colnames(FeaseyMalawi_NWT_prediction) <- c('S','I_R')
rownames(FeaseyMalawi_NWT_prediction) <- c('pred_S','pred_I_R')
FeaseyMalawi_NWT_prediction_summary<-metrics(FeaseyMalawi_NWT_prediction)
EUSCAPE_EURECA_SI <- EUSCAPE_EURECA %>% 
  rename(Group=...1) %>% 
  filter(Group=="pred_S" | Group=="pred_I") %>%
  rowwise()%>%
  mutate(S_I = sum(S, I)) %>%
  mutate(I_R = sum(I, R)) %>%
  select(-c(I)) %>%
  adorn_totals("row") %>%
  mutate(Group = gsub("Total", "pred_S_I", Group)) 


EUSCAPE_EURECA_R <- EUSCAPE_EURECA %>% 
  rename(Group=...1) %>% 
  filter(Group=="pred_R" | Group=="pred_I") %>%
  rowwise()%>%
  mutate(S_I = sum(S, I)) %>%
  mutate(I_R = sum(I, R)) %>%
  select(-c(I)) %>%
  adorn_totals("row") %>%
  mutate(Group = gsub("Total", "pred_I_R", Group)) 

EUSCAPE_EURECA_SIR <- rbind(EUSCAPE_EURECA_SI, EUSCAPE_EURECA_R) %>% distinct()

subset(EUSCAPE_EURECA_SIR, Group == "pred_S_I")$S_I
## [1] 430
table(EUSCAPE_EURECA_SIR$S_I)
## 
##   2  16  18 428 430 
##   1   1   1   1   1
EUSCAPE_EURECA_R_prediction <- matrix(c(subset(EUSCAPE_EURECA_SIR, Group == "pred_S_I")$S_I, 
                subset(EUSCAPE_EURECA_SIR, Group == "pred_S_I")$R,
                subset(EUSCAPE_EURECA_SIR, Group == "pred_R")$S_I,
                subset(EUSCAPE_EURECA_SIR, Group == "pred_R")$R), ncol=2, byrow=TRUE)

colnames(EUSCAPE_EURECA_R_prediction) <- c('S_I','R')
rownames(EUSCAPE_EURECA_R_prediction) <- c('pred_S_I','pred_R')
EUSCAPE_EURECA_R_prediction_summary<-metrics(EUSCAPE_EURECA_R_prediction)

EUSCAPE_EURECA_NWT_prediction <- matrix(c(subset(EUSCAPE_EURECA_SIR, Group == "pred_S")$S, 
                subset(EUSCAPE_EURECA_SIR, Group == "pred_S")$I_R,
                subset(EUSCAPE_EURECA_SIR, Group == "pred_I_R")$S,
                subset(EUSCAPE_EURECA_SIR, Group == "pred_I_R")$I_R), ncol=2, byrow=TRUE)
colnames(EUSCAPE_EURECA_NWT_prediction) <- c('S','I_R')
rownames(EUSCAPE_EURECA_NWT_prediction) <- c('pred_S','pred_I_R')
EUSCAPE_EURECA_NWT_prediction_summary<-metrics(EUSCAPE_EURECA_NWT_prediction)
EgliSwitzerland_SI <- EgliSwitzerland %>% 
  rename(Group=...1) %>% 
  filter(Group=="pred_S" | Group=="pred_I") %>%
  rowwise()%>%
  mutate(S_I = sum(S, I)) %>%
  mutate(I_R = sum(I, R)) %>%
  select(-c(I)) %>%
  adorn_totals("row") %>%
  mutate(Group = gsub("Total", "pred_S_I", Group)) 


EgliSwitzerland_R <- EgliSwitzerland %>% 
  rename(Group=...1) %>% 
  filter(Group=="pred_R" | Group=="pred_I") %>%
  rowwise()%>%
  mutate(S_I = sum(S, I)) %>%
  mutate(I_R = sum(I, R)) %>%
  select(-c(I)) %>%
  adorn_totals("row") %>%
  mutate(Group = gsub("Total", "pred_I_R", Group)) 

EgliSwitzerland_SIR <- rbind(EgliSwitzerland_SI, EgliSwitzerland_R) %>% distinct()

subset(EgliSwitzerland_SIR, Group == "pred_S_I")$S_I
## [1] 13
table(EgliSwitzerland_SIR$S_I)
## 
##  1  6  7 12 13 
##  1  1  1  1  1
EgliSwitzerland_R_prediction <- matrix(c(subset(EgliSwitzerland_SIR, Group == "pred_S_I")$S_I, 
                subset(EgliSwitzerland_SIR, Group == "pred_S_I")$R,
                subset(EgliSwitzerland_SIR, Group == "pred_R")$S_I,
                subset(EgliSwitzerland_SIR, Group == "pred_R")$R), ncol=2, byrow=TRUE)

colnames(EgliSwitzerland_R_prediction) <- c('S_I','R')
rownames(EgliSwitzerland_R_prediction) <- c('pred_S_I','pred_R')
EgliSwitzerland_R_prediction_summary<-metrics(EgliSwitzerland_R_prediction)

EgliSwitzerland_NWT_prediction <- matrix(c(subset(EgliSwitzerland_SIR, Group == "pred_S")$S, 
                subset(EgliSwitzerland_SIR, Group == "pred_S")$I_R,
                subset(EgliSwitzerland_SIR, Group == "pred_I_R")$S,
                subset(EgliSwitzerland_SIR, Group == "pred_I_R")$I_R), ncol=2, byrow=TRUE)
colnames(EgliSwitzerland_NWT_prediction) <- c('S','I_R')
rownames(EgliSwitzerland_NWT_prediction) <- c('pred_S','pred_I_R')
EgliSwitzerland_NWT_prediction_summary<-metrics(EgliSwitzerland_NWT_prediction)
HRYC_Spain_SI <- HRYC_Spain %>% 
  rename(Group=...1) %>% 
  filter(Group=="pred_S" | Group=="pred_I") %>%
  rowwise()%>%
  mutate(S_I = sum(S, I)) %>%
  mutate(I_R = sum(I, R)) %>%
  select(-c(I)) %>%
  adorn_totals("row") %>%
  mutate(Group = gsub("Total", "pred_S_I", Group)) 


HRYC_Spain_R <- HRYC_Spain %>% 
  rename(Group=...1) %>% 
  filter(Group=="pred_R" | Group=="pred_I") %>%
  rowwise()%>%
  mutate(S_I = sum(S, I)) %>%
  mutate(I_R = sum(I, R)) %>%
  select(-c(I)) %>%
  adorn_totals("row") %>%
  mutate(Group = gsub("Total", "pred_I_R", Group)) 

HRYC_Spain_SIR <- rbind(HRYC_Spain_SI, HRYC_Spain_R) %>% distinct()

subset(HRYC_Spain_SIR, Group == "pred_S_I")$S_I
## [1] 11
table(HRYC_Spain_SIR$S_I)
## 
##  0 11 16 
##  1  2  2
HRYC_Spain_R_prediction <- matrix(c(subset(HRYC_Spain_SIR, Group == "pred_S_I")$S_I, 
                subset(HRYC_Spain_SIR, Group == "pred_S_I")$R,
                subset(HRYC_Spain_SIR, Group == "pred_R")$S_I,
                subset(HRYC_Spain_SIR, Group == "pred_R")$R), ncol=2, byrow=TRUE)

colnames(HRYC_Spain_R_prediction) <- c('S_I','R')
rownames(HRYC_Spain_R_prediction) <- c('pred_S_I','pred_R')
HRYC_Spain_R_prediction_summary<-metrics(HRYC_Spain_R_prediction)

HRYC_Spain_NWT_prediction <- matrix(c(subset(HRYC_Spain_SIR, Group == "pred_S")$S, 
                subset(HRYC_Spain_SIR, Group == "pred_S")$I_R,
                subset(HRYC_Spain_SIR, Group == "pred_I_R")$S,
                subset(HRYC_Spain_SIR, Group == "pred_I_R")$I_R), ncol=2, byrow=TRUE)
colnames(HRYC_Spain_NWT_prediction) <- c('S','I_R')
rownames(HRYC_Spain_NWT_prediction) <- c('pred_S','pred_I_R')
HRYC_Spain_NWT_prediction_summary<-metrics(HRYC_Spain_NWT_prediction)
IndiaGHRU_SI <- IndiaGHRU %>% 
  rename(Group=...1) %>% 
  filter(Group=="pred_S" | Group=="pred_I") %>%
  rowwise()%>%
  mutate(S_I = sum(S, I)) %>%
  mutate(I_R = sum(I, R)) %>%
  select(-c(I)) %>%
  adorn_totals("row") %>%
  mutate(Group = gsub("Total", "pred_S_I", Group)) 


IndiaGHRU_R <- IndiaGHRU %>% 
  rename(Group=...1) %>% 
  filter(Group=="pred_R" | Group=="pred_I") %>%
  rowwise()%>%
  mutate(S_I = sum(S, I)) %>%
  mutate(I_R = sum(I, R)) %>%
  select(-c(I)) %>%
  adorn_totals("row") %>%
  mutate(Group = gsub("Total", "pred_I_R", Group)) 

IndiaGHRU_SIR <- rbind(IndiaGHRU_SI, IndiaGHRU_R) %>% distinct()

subset(IndiaGHRU_SIR, Group == "pred_S_I")$S_I
## [1] 70
table(IndiaGHRU_SIR$S_I)
## 
##  2 25 27 68 70 
##  1  1  1  1  1
IndiaGHRU_R_prediction <- matrix(c(subset(IndiaGHRU_SIR, Group == "pred_S_I")$S_I, 
                subset(IndiaGHRU_SIR, Group == "pred_S_I")$R,
                subset(IndiaGHRU_SIR, Group == "pred_R")$S_I,
                subset(IndiaGHRU_SIR, Group == "pred_R")$R), ncol=2, byrow=TRUE)

colnames(IndiaGHRU_R_prediction) <- c('S_I','R')
rownames(IndiaGHRU_R_prediction) <- c('pred_S_I','pred_R')
IndiaGHRU_R_prediction_summary<-metrics(IndiaGHRU_R_prediction)

IndiaGHRU_NWT_prediction <- matrix(c(subset(IndiaGHRU_SIR, Group == "pred_S")$S, 
                subset(IndiaGHRU_SIR, Group == "pred_S")$I_R,
                subset(IndiaGHRU_SIR, Group == "pred_I_R")$S,
                subset(IndiaGHRU_SIR, Group == "pred_I_R")$I_R), ncol=2, byrow=TRUE)
colnames(IndiaGHRU_NWT_prediction) <- c('S','I_R')
rownames(IndiaGHRU_NWT_prediction) <- c('pred_S','pred_I_R')
IndiaGHRU_NWT_prediction_summary<-metrics(IndiaGHRU_NWT_prediction)
NeumannGermany_SI <- NeumannGermany %>% 
  rename(Group=...1) %>% 
  filter(Group=="pred_S" | Group=="pred_I") %>%
  rowwise()%>%
  mutate(S_I = sum(S, I)) %>%
  mutate(I_R = sum(I, R)) %>%
  select(-c(I)) %>%
  adorn_totals("row") %>%
  mutate(Group = gsub("Total", "pred_S_I", Group)) 


NeumannGermany_R <- NeumannGermany %>% 
  rename(Group=...1) %>% 
  filter(Group=="pred_R" | Group=="pred_I") %>%
  rowwise()%>%
  mutate(S_I = sum(S, I)) %>%
  mutate(I_R = sum(I, R)) %>%
  select(-c(I)) %>%
  adorn_totals("row") %>%
  mutate(Group = gsub("Total", "pred_I_R", Group)) 

NeumannGermany_SIR <- rbind(NeumannGermany_SI, NeumannGermany_R) %>% distinct()

subset(NeumannGermany_SIR, Group == "pred_S_I")$S_I
## [1] 51
table(NeumannGermany_SIR$S_I)
## 
##  0  3 51 
##  1  2  2
NeumannGermany_R_prediction <- matrix(c(subset(NeumannGermany_SIR, Group == "pred_S_I")$S_I, 
                subset(NeumannGermany_SIR, Group == "pred_S_I")$R,
                subset(NeumannGermany_SIR, Group == "pred_R")$S_I,
                subset(NeumannGermany_SIR, Group == "pred_R")$R), ncol=2, byrow=TRUE)

colnames(NeumannGermany_R_prediction) <- c('S_I','R')
rownames(NeumannGermany_R_prediction) <- c('pred_S_I','pred_R')
NeumannGermany_R_prediction_summary<-metrics(NeumannGermany_R_prediction)

NeumannGermany_NWT_prediction <- matrix(c(subset(NeumannGermany_SIR, Group == "pred_S")$S, 
                subset(NeumannGermany_SIR, Group == "pred_S")$I_R,
                subset(NeumannGermany_SIR, Group == "pred_I_R")$S,
                subset(NeumannGermany_SIR, Group == "pred_I_R")$I_R), ncol=2, byrow=TRUE)
colnames(NeumannGermany_NWT_prediction) <- c('S','I_R')
rownames(NeumannGermany_NWT_prediction) <- c('pred_S','pred_I_R')
NeumannGermany_NWT_prediction_summary<-metrics(NeumannGermany_NWT_prediction)
Whitelaw_CHAMPS_SI <- Whitelaw_CHAMPS %>% 
  rename(Group=...1) %>% 
  filter(Group=="pred_S" | Group=="pred_I") %>%
  rowwise()%>%
  mutate(S_I = sum(S, I)) %>%
  mutate(I_R = sum(I, R)) %>%
  select(-c(I)) %>%
  adorn_totals("row") %>%
  mutate(Group = gsub("Total", "pred_S_I", Group)) 

Whitelaw_CHAMPS_R <- Whitelaw_CHAMPS %>% 
  rename(Group=...1) %>% 
  filter(Group=="pred_R" | Group=="pred_I") %>%
  rowwise()%>%
  mutate(S_I = sum(S, I)) %>%
  mutate(I_R = sum(I, R)) %>%
  select(-c(I)) %>%
  adorn_totals("row") %>%
  mutate(Group = gsub("Total", "pred_I_R", Group)) 

Whitelaw_CHAMPS_SIR <- rbind(Whitelaw_CHAMPS_SI, Whitelaw_CHAMPS_R) %>% distinct()

subset(Whitelaw_CHAMPS_SIR, Group == "pred_S_I")$S_I
## [1] 18
table(Whitelaw_CHAMPS_SIR$S_I)
## 
##  0  1 18 
##  1  2  2
Whitelaw_CHAMPS_R_prediction <- matrix(c(subset(Whitelaw_CHAMPS_SIR, Group == "pred_S_I")$S_I, 
                subset(Whitelaw_CHAMPS_SIR, Group == "pred_S_I")$R,
                subset(Whitelaw_CHAMPS_SIR, Group == "pred_R")$S_I,
                subset(Whitelaw_CHAMPS_SIR, Group == "pred_R")$R), ncol=2, byrow=TRUE)

colnames(Whitelaw_CHAMPS_R_prediction) <- c('S_I','R')
rownames(Whitelaw_CHAMPS_R_prediction) <- c('pred_S_I','pred_R')
Whitelaw_CHAMPS_R_prediction_summary<-metrics(Whitelaw_CHAMPS_R_prediction)

Whitelaw_CHAMPS_NWT_prediction <- matrix(c(subset(Whitelaw_CHAMPS_SIR, Group == "pred_S")$S, 
                subset(Whitelaw_CHAMPS_SIR, Group == "pred_S")$I_R,
                subset(Whitelaw_CHAMPS_SIR, Group == "pred_I_R")$S,
                subset(Whitelaw_CHAMPS_SIR, Group == "pred_I_R")$I_R), ncol=2, byrow=TRUE)
colnames(Whitelaw_CHAMPS_NWT_prediction) <- c('S','I_R')
rownames(Whitelaw_CHAMPS_NWT_prediction) <- c('pred_S','pred_I_R')
Whitelaw_CHAMPS_NWT_prediction_summary<-metrics(Whitelaw_CHAMPS_NWT_prediction)
Oxfordshire_Kpn_SI <- Oxfordshire_Kpn %>% 
  rename(Group=...1) %>% 
  filter(Group=="pred_S" | Group=="pred_I") %>%
  rowwise()%>%
  mutate(S_I = sum(S, I)) %>%
  mutate(I_R = sum(I, R)) %>%
  select(-c(I)) %>%
  adorn_totals("row") %>%
  mutate(Group = gsub("Total", "pred_S_I", Group)) 

Oxfordshire_Kpn_R <- Oxfordshire_Kpn %>% 
  rename(Group=...1) %>% 
  filter(Group=="pred_R" | Group=="pred_I") %>%
  rowwise()%>%
  mutate(S_I = sum(S, I)) %>%
  mutate(I_R = sum(I, R)) %>%
  select(-c(I)) %>%
  adorn_totals("row") %>%
  mutate(Group = gsub("Total", "pred_I_R", Group)) 

Oxfordshire_Kpn_SIR <- rbind(Oxfordshire_Kpn_SI, Oxfordshire_Kpn_R) %>% distinct()

subset(Oxfordshire_Kpn_SIR, Group == "pred_S_I")$S_I
## [1] 252
table(Oxfordshire_Kpn_SIR$S_I)
## 
##   6  12 246 252 
##   2   1   1   1
Oxfordshire_Kpn_R_prediction <- matrix(c(subset(Oxfordshire_Kpn_SIR, Group == "pred_S_I")$S_I, 
                subset(Oxfordshire_Kpn_SIR, Group == "pred_S_I")$R,
                subset(Oxfordshire_Kpn_SIR, Group == "pred_R")$S_I,
                subset(Oxfordshire_Kpn_SIR, Group == "pred_R")$R), ncol=2, byrow=TRUE)

colnames(Oxfordshire_Kpn_R_prediction) <- c('S_I','R')
rownames(Oxfordshire_Kpn_R_prediction) <- c('pred_S_I','pred_R')
Oxfordshire_Kpn_R_prediction_summary<-metrics(Oxfordshire_Kpn_R_prediction)

Oxfordshire_Kpn_NWT_prediction <- matrix(c(subset(Oxfordshire_Kpn_SIR, Group == "pred_S")$S, 
                subset(Oxfordshire_Kpn_SIR, Group == "pred_S")$I_R,
                subset(Oxfordshire_Kpn_SIR, Group == "pred_I_R")$S,
                subset(Oxfordshire_Kpn_SIR, Group == "pred_I_R")$I_R), ncol=2, byrow=TRUE)
colnames(Oxfordshire_Kpn_NWT_prediction) <- c('S','I_R')
rownames(Oxfordshire_Kpn_NWT_prediction) <- c('pred_S','pred_I_R')
Oxfordshire_Kpn_NWT_prediction_summary<-metrics(Oxfordshire_Kpn_NWT_prediction)
Oxfordshire_Kvv_SI <- Oxfordshire_Kvv %>% 
  rename(Group=...1) %>% 
  filter(Group=="pred_S" | Group=="pred_I") %>%
  rowwise()%>%
  mutate(S_I = sum(S, I)) %>%
  mutate(I_R = sum(I, R)) %>%
  select(-c(I)) %>%
  adorn_totals("row") %>%
  mutate(Group = gsub("Total", "pred_S_I", Group)) 

Oxfordshire_Kvv_R <- Oxfordshire_Kvv %>% 
  rename(Group=...1) %>% 
  filter(Group=="pred_R" | Group=="pred_I") %>%
  rowwise()%>%
  mutate(S_I = sum(S, I)) %>%
  mutate(I_R = sum(I, R)) %>%
  select(-c(I)) %>%
  adorn_totals("row") %>%
  mutate(Group = gsub("Total", "pred_I_R", Group)) 

Oxfordshire_Kvv_SIR <- rbind(Oxfordshire_Kvv_SI, Oxfordshire_Kvv_R) %>% distinct()

subset(Oxfordshire_Kvv_SIR, Group == "pred_S_I")$S_I
## [1] 65
table(Oxfordshire_Kvv_SIR$S_I)
## 
##  0  1 65 
##  1  2  2
Oxfordshire_Kvv_R_prediction <- matrix(c(subset(Oxfordshire_Kvv_SIR, Group == "pred_S_I")$S_I, 
                subset(Oxfordshire_Kvv_SIR, Group == "pred_S_I")$R,
                subset(Oxfordshire_Kvv_SIR, Group == "pred_R")$S_I,
                subset(Oxfordshire_Kvv_SIR, Group == "pred_R")$R), ncol=2, byrow=TRUE)

colnames(Oxfordshire_Kvv_R_prediction) <- c('S_I','R')
rownames(Oxfordshire_Kvv_R_prediction) <- c('pred_S_I','pred_R')
Oxfordshire_Kvv_R_prediction_summary<-metrics(Oxfordshire_Kvv_R_prediction)

Oxfordshire_Kvv_NWT_prediction <- matrix(c(subset(Oxfordshire_Kvv_SIR, Group == "pred_S")$S, 
                subset(Oxfordshire_Kvv_SIR, Group == "pred_S")$I_R,
                subset(Oxfordshire_Kvv_SIR, Group == "pred_I_R")$S,
                subset(Oxfordshire_Kvv_SIR, Group == "pred_I_R")$I_R), ncol=2, byrow=TRUE)
colnames(Oxfordshire_Kvv_NWT_prediction) <- c('S','I_R')
rownames(Oxfordshire_Kvv_NWT_prediction) <- c('pred_S','pred_I_R')
Oxfordshire_Kvv_NWT_prediction_summary<-metrics(Oxfordshire_Kvv_NWT_prediction)
IZSAM_Italy_SI <- IZSAM_Italy %>% 
  rename(Group=...1) %>% 
  filter(Group=="pred_S" | Group=="pred_I") %>%
  rowwise()%>%
  mutate(S_I = sum(S, I)) %>%
  mutate(I_R = sum(I, R)) %>%
  select(-c(I)) %>%
  adorn_totals("row") %>%
  mutate(Group = gsub("Total", "pred_S_I", Group)) 

IZSAM_Italy_R <- IZSAM_Italy %>% 
  rename(Group=...1) %>% 
  filter(Group=="pred_R" | Group=="pred_I") %>%
  rowwise()%>%
  mutate(S_I = sum(S, I)) %>%
  mutate(I_R = sum(I, R)) %>%
  select(-c(I)) %>%
  adorn_totals("row") %>%
  mutate(Group = gsub("Total", "pred_I_R", Group)) 

IZSAM_Italy_SIR <- rbind(IZSAM_Italy_SI, IZSAM_Italy_R) %>% distinct()

subset(IZSAM_Italy_SIR, Group == "pred_S_I")$S_I
## [1] 7
table(IZSAM_Italy_SIR$S_I)
## 
## 0 3 7 
## 1 2 2
IZSAM_Italy_R_prediction <- matrix(c(subset(IZSAM_Italy_SIR, Group == "pred_S_I")$S_I, 
                subset(IZSAM_Italy_SIR, Group == "pred_S_I")$R,
                subset(IZSAM_Italy_SIR, Group == "pred_R")$S_I,
                subset(IZSAM_Italy_SIR, Group == "pred_R")$R), ncol=2, byrow=TRUE)

colnames(IZSAM_Italy_R_prediction) <- c('S_I','R')
rownames(IZSAM_Italy_R_prediction) <- c('pred_S_I','pred_R')
IZSAM_Italy_R_prediction_summary<-metrics(IZSAM_Italy_R_prediction)

IZSAM_Italy_NWT_prediction <- matrix(c(subset(IZSAM_Italy_SIR, Group == "pred_S")$S, 
                subset(IZSAM_Italy_SIR, Group == "pred_S")$I_R,
                subset(IZSAM_Italy_SIR, Group == "pred_I_R")$S,
                subset(IZSAM_Italy_SIR, Group == "pred_I_R")$I_R), ncol=2, byrow=TRUE)
colnames(IZSAM_Italy_NWT_prediction) <- c('S','I_R')
rownames(IZSAM_Italy_NWT_prediction) <- c('pred_S','pred_I_R')
IZSAM_Italy_NWT_prediction_summary<-metrics(IZSAM_Italy_NWT_prediction)
LaPaz_Lazaro_SI <- LaPaz_Lazaro %>% 
  rename(Group=...1) %>% 
  filter(Group=="pred_S" | Group=="pred_I") %>%
  rowwise()%>%
  mutate(S_I = sum(S, I)) %>%
  mutate(I_R = sum(I, R)) %>%
  select(-c(I)) %>%
  adorn_totals("row") %>%
  mutate(Group = gsub("Total", "pred_S_I", Group)) 


LaPaz_Lazaro_R <- LaPaz_Lazaro %>% 
  rename(Group=...1) %>% 
  filter(Group=="pred_R" | Group=="pred_I") %>%
  rowwise()%>%
  mutate(S_I = sum(S, I)) %>%
  mutate(I_R = sum(I, R)) %>%
  select(-c(I)) %>%
  adorn_totals("row") %>%
  mutate(Group = gsub("Total", "pred_I_R", Group)) 

LaPaz_Lazaro_SIR <- rbind(LaPaz_Lazaro_SI, LaPaz_Lazaro_R) %>% distinct()

subset(LaPaz_Lazaro_SIR, Group == "pred_S_I")$S_I
## [1] 11
table(LaPaz_Lazaro_SIR$S_I)
## 
##  0  1 11 
##  1  2  2
LaPaz_Lazaro_R_prediction <- matrix(c(subset(LaPaz_Lazaro_SIR, Group == "pred_S_I")$S_I, 
                subset(LaPaz_Lazaro_SIR, Group == "pred_S_I")$R,
                subset(LaPaz_Lazaro_SIR, Group == "pred_R")$S_I,
                subset(LaPaz_Lazaro_SIR, Group == "pred_R")$R), ncol=2, byrow=TRUE)

colnames(LaPaz_Lazaro_R_prediction) <- c('S_I','R')
rownames(LaPaz_Lazaro_R_prediction) <- c('pred_S_I','pred_R')
LaPaz_Lazaro_R_prediction_summary<-metrics(LaPaz_Lazaro_R_prediction)

LaPaz_Lazaro_NWT_prediction <- matrix(c(subset(LaPaz_Lazaro_SIR, Group == "pred_S")$S, 
                subset(LaPaz_Lazaro_SIR, Group == "pred_S")$I_R,
                subset(LaPaz_Lazaro_SIR, Group == "pred_I_R")$S,
                subset(LaPaz_Lazaro_SIR, Group == "pred_I_R")$I_R), ncol=2, byrow=TRUE)
colnames(LaPaz_Lazaro_NWT_prediction) <- c('S','I_R')
rownames(LaPaz_Lazaro_NWT_prediction) <- c('pred_S','pred_I_R')
LaPaz_Lazaro_NWT_prediction_summary<-metrics(LaPaz_Lazaro_NWT_prediction)
HarrisMERINO_SI <- HarrisMERINO %>% 
  rename(Group=...1) %>% 
  filter(Group=="pred_S" | Group=="pred_I") %>%
  rowwise()%>%
  mutate(S_I = sum(S, I)) %>%
  mutate(I_R = sum(I, R)) %>%
  select(-c(I)) %>%
  adorn_totals("row") %>%
  mutate(Group = gsub("Total", "pred_S_I", Group)) 


HarrisMERINO_R <- HarrisMERINO %>% 
  rename(Group=...1) %>% 
  filter(Group=="pred_R" | Group=="pred_I") %>%
  rowwise()%>%
  mutate(S_I = sum(S, I)) %>%
  mutate(I_R = sum(I, R)) %>%
  select(-c(I)) %>%
  adorn_totals("row") %>%
  mutate(Group = gsub("Total", "pred_I_R", Group)) 

HarrisMERINO_SIR <- rbind(HarrisMERINO_SI, HarrisMERINO_R) %>% distinct()

subset(HarrisMERINO_SIR, Group == "pred_S_I")$S_I
## [1] 13
table(HarrisMERINO_SIR$S_I)
## 
##  2  5  7  8 13 
##  1  1  1  1  1
HarrisMERINO_R_prediction <- matrix(c(subset(HarrisMERINO_SIR, Group == "pred_S_I")$S_I, 
                subset(HarrisMERINO_SIR, Group == "pred_S_I")$R,
                subset(HarrisMERINO_SIR, Group == "pred_R")$S_I,
                subset(HarrisMERINO_SIR, Group == "pred_R")$R), ncol=2, byrow=TRUE)

colnames(HarrisMERINO_R_prediction) <- c('S_I','R')
rownames(HarrisMERINO_R_prediction) <- c('pred_S_I','pred_R')
HarrisMERINO_R_prediction_summary<-metrics(HarrisMERINO_R_prediction)

HarrisMERINO_NWT_prediction <- matrix(c(subset(HarrisMERINO_SIR, Group == "pred_S")$S, 
                subset(HarrisMERINO_SIR, Group == "pred_S")$I_R,
                subset(HarrisMERINO_SIR, Group == "pred_I_R")$S,
                subset(HarrisMERINO_SIR, Group == "pred_I_R")$I_R), ncol=2, byrow=TRUE)
colnames(HarrisMERINO_NWT_prediction) <- c('S','I_R')
rownames(HarrisMERINO_NWT_prediction) <- c('pred_S','pred_I_R')
HarrisMERINO_NWT_prediction_summary<-metrics(HarrisMERINO_NWT_prediction)
PachecoBrazil_SI <- PachecoBrazil %>% 
  rename(Group=...1) %>% 
  filter(Group=="pred_S" | Group=="pred_I") %>%
  rowwise()%>%
  mutate(S_I = sum(S, I)) %>%
  mutate(I_R = sum(I, R)) %>%
  select(-c(I)) %>%
  adorn_totals("row") %>%
  mutate(Group = gsub("Total", "pred_S_I", Group)) 


PachecoBrazil_R <- PachecoBrazil %>% 
  rename(Group=...1) %>% 
  filter(Group=="pred_R" | Group=="pred_I") %>%
  rowwise()%>%
  mutate(S_I = sum(S, I)) %>%
  mutate(I_R = sum(I, R)) %>%
  select(-c(I)) %>%
  adorn_totals("row") %>%
  mutate(Group = gsub("Total", "pred_I_R", Group)) 

PachecoBrazil_SIR <- rbind(PachecoBrazil_SI, PachecoBrazil_R) %>% distinct()

subset(PachecoBrazil_SIR, Group == "pred_S_I")$S_I
## [1] 10
table(PachecoBrazil_SIR$S_I)
## 
##  0 10 
##  3  2
PachecoBrazil_R_prediction <- matrix(c(subset(PachecoBrazil_SIR, Group == "pred_S_I")$S_I, 
                subset(PachecoBrazil_SIR, Group == "pred_S_I")$R,
                subset(PachecoBrazil_SIR, Group == "pred_R")$S_I,
                subset(PachecoBrazil_SIR, Group == "pred_R")$R), ncol=2, byrow=TRUE)

colnames(PachecoBrazil_R_prediction) <- c('S_I','R')
rownames(PachecoBrazil_R_prediction) <- c('pred_S_I','pred_R')
PachecoBrazil_R_prediction_summary<-metrics(PachecoBrazil_R_prediction)

PachecoBrazil_NWT_prediction <- matrix(c(subset(PachecoBrazil_SIR, Group == "pred_S")$S, 
                subset(PachecoBrazil_SIR, Group == "pred_S")$I_R,
                subset(PachecoBrazil_SIR, Group == "pred_I_R")$S,
                subset(PachecoBrazil_SIR, Group == "pred_I_R")$I_R), ncol=2, byrow=TRUE)
colnames(PachecoBrazil_NWT_prediction) <- c('S','I_R')
rownames(PachecoBrazil_NWT_prediction) <- c('pred_S','pred_I_R')
PachecoBrazil_NWT_prediction_summary<-metrics(PachecoBrazil_NWT_prediction)
pooled_R <- 
bind_rows(MBIRA_Kpn_SIR,
          MBIRA_Kvv_SIR, 
          HowdenGorrie_SIR,
          AGAR_SIR,
          Milan_SIR,
          HailuEthiopia_SIR,
          KayamaJapan_SIR,
          FeaseyMalawi_SIR, 
          EUSCAPE_EURECA_SIR,
          EgliSwitzerland_SIR,
          HRYC_Spain_SIR,
          IndiaGHRU_SIR,
          NeumannGermany_SIR,
          Whitelaw_CHAMPS_SIR,
          Oxfordshire_Kpn_SIR,
          Oxfordshire_Kvv_SIR,
          IZSAM_Italy_SIR, 
          LaPaz_Lazaro_SIR,
          HarrisMERINO_SIR,
          PachecoBrazil_SIR) %>%
    group_by(Group) %>%
    summarise_all(funs(sum(., na.rm = TRUE)))

pooled_R_prediction <- matrix(c(subset(pooled_R, Group == "pred_S_I")$S_I, 
                subset(pooled_R, Group == "pred_S_I")$R,
                subset(pooled_R, Group == "pred_R")$S_I,
                subset(pooled_R, Group == "pred_R")$R), ncol=2, byrow=TRUE)

colnames(pooled_R_prediction) <- c('S_I','R')
rownames(pooled_R_prediction) <- c('pred_S_I','pred_R')

pooled_R_summary<-metrics(pooled_R_prediction)

summary of pooled genotypes vs. SIR

EUSCAPE_EURECA_genotype <- EUSCAPE_EURECA %>% 
  rename(Group=...1) %>% 
  filter(Group!="pred_S" & Group!="pred_I"& Group!="pred_R") 

KayamaJapan_genotype <- KayamaJapan %>% 
  rename(Group=...1) %>% 
  filter(Group!="pred_S" & Group!="pred_I"& Group!="pred_R")

IndiaGHRU_genotype <- IndiaGHRU %>% 
  rename(Group=...1) %>% 
  filter(Group!="pred_S" & Group!="pred_I"& Group!="pred_R") 

AGAR_genotype <- AGAR %>% 
  rename(Group=...1) %>% 
  filter(Group!="pred_S" & Group!="pred_I"& Group!="pred_R") 

Oxfordshire_Kpn_genotype <-Oxfordshire_Kpn %>% 
  rename(Group=...1) %>% 
  filter(Group!="pred_S" & Group!="pred_I"& Group!="pred_R") 

Oxfordshire_Kvv_genotype <-Oxfordshire_Kpn %>% 
  rename(Group=...1) %>% 
  filter(Group!="pred_S" & Group!="pred_I"& Group!="pred_R") 

MBIRA_Kpn_genotype <- MBIRA_Kpn %>% 
  rename(Group=...1) %>% 
  filter(Group!="pred_S" & Group!="pred_I"& Group!="pred_R") 

HowdenGorrie_genotype <- HowdenGorrie %>% 
  rename(Group=...1) %>% 
  filter(Group!="pred_S" & Group!="pred_I"& Group!="pred_R") 

Milan_genotype <- Milan %>% 
  rename(Group=...1) %>% 
  filter(Group!="pred_S" & Group!="pred_I"& Group!="pred_R") 

HRYC_Spain_genotype <- HRYC_Spain %>% 
  rename(Group=...1) %>% 
  filter(Group!="pred_S" & Group!="pred_I"& Group!="pred_R") 

LaPaz_Lazaro_genotype <- LaPaz_Lazaro %>% 
  rename(Group=...1) %>% 
  filter(Group!="pred_S" & Group!="pred_I"& Group!="pred_R")

MBIRA_Kvv_genotype <- MBIRA_Kvv %>% 
  rename(Group=...1) %>% 
  filter(Group!="pred_S" & Group!="pred_I"& Group!="pred_R") 

HarrisMERINO_genotype <- HarrisMERINO %>% 
  rename(Group=...1) %>% 
  filter(Group!="pred_S" & Group!="pred_I"& Group!="pred_R") 

PachecoBrazil_genotype <- PachecoBrazil %>% 
  rename(Group=...1) %>% 
  filter(Group!="pred_S" & Group!="pred_I"& Group!="pred_R") 
IZSAM_Italy_genotype <- IZSAM_Italy %>% 
  rename(Group=...1) %>% 
  filter(Group!="pred_S" & Group!="pred_I"& Group!="pred_R") 

HailuEthiopia_genotype <- HailuEthiopia %>% 
  rename(Group=...1) %>% 
  filter(Group!="pred_S" & Group!="pred_I"& Group!="pred_R") 
FeaseyMalawi_genotype <- FeaseyMalawi %>% 
  rename(Group=...1) %>% 
  filter(Group!="pred_S" & Group!="pred_I"& Group!="pred_R") 


EgliSwitzerland_genotype <- EgliSwitzerland %>% 
  rename(Group=...1) %>% 
  filter(Group!="pred_S" & Group!="pred_I"& Group!="pred_R") 


NeumannGermany_genotype <- NeumannGermany %>% 
  rename(Group=...1) %>% 
  filter(Group!="pred_S" & Group!="pred_I"& Group!="pred_R") 
Whitelaw_CHAMPS_genotype <- Whitelaw_CHAMPS %>% 
  rename(Group=...1) %>% 
  filter(Group!="pred_S" & Group!="pred_I"& Group!="pred_R") 


pooled_genotype <- 
bind_rows(MBIRA_Kpn_genotype,
          MBIRA_Kvv_genotype, 
          HowdenGorrie_genotype,
          AGAR_genotype,
          Milan_genotype,
          HailuEthiopia_genotype,
          KayamaJapan_genotype,
          FeaseyMalawi_genotype, 
          EUSCAPE_EURECA_genotype,
          EgliSwitzerland_genotype,
          HRYC_Spain_genotype,
          IndiaGHRU_genotype,
          NeumannGermany_genotype,
          Whitelaw_CHAMPS_genotype,
          Oxfordshire_Kpn_genotype,
          Oxfordshire_Kvv_genotype,
          IZSAM_Italy_genotype, 
          LaPaz_Lazaro_genotype,
          HarrisMERINO_genotype,
          PachecoBrazil_genotype) %>%
    group_by(Group) %>%
    summarise_all(funs(sum(., na.rm = TRUE)))
pooled_NWT <- 
bind_rows(MBIRA_Kpn_SIR,
          MBIRA_Kvv_SIR, 
          HowdenGorrie_SIR,
          AGAR_SIR,
          Milan_SIR,
          HailuEthiopia_SIR,
          KayamaJapan_SIR,
          FeaseyMalawi_SIR, 
          EUSCAPE_EURECA_SIR,
          EgliSwitzerland_SIR,
          HRYC_Spain_SIR,
          IndiaGHRU_SIR,
          NeumannGermany_SIR,
          Whitelaw_CHAMPS_SIR,
          Oxfordshire_Kpn_SIR,
          Oxfordshire_Kvv_SIR,
          IZSAM_Italy_SIR, 
          LaPaz_Lazaro_SIR,
          HarrisMERINO_SIR,
          PachecoBrazil_SIR) %>%
    group_by(Group) %>%
    summarise_all(funs(sum(., na.rm = TRUE)))

pooled_NWT_prediction <- matrix(c(subset(pooled_NWT, Group == "pred_S")$S, 
                subset(pooled_NWT, Group == "pred_S")$I_R,
                subset(pooled_NWT, Group == "pred_I_R")$S,
                subset(pooled_NWT, Group == "pred_I_R")$I_R), ncol=2, byrow=TRUE)
colnames(pooled_NWT_prediction) <- c('S','I_R')
rownames(pooled_NWT_prediction) <- c('pred_S','pred_I_R')

pooled_NWT_summary<-metrics(pooled_NWT_prediction)

TOTAL SIR counts per dataset

SIR_counts_table <- bind_rows(lst(EUSCAPE_EURECA_SIR,
                            KayamaJapan_SIR, 
                            IndiaGHRU_SIR,
                            AGAR_SIR,
                          Oxfordshire_Kpn_SIR, 
                            IZSAM_Italy_SIR, 
                            MBIRA_Kpn_SIR, 
                            HowdenGorrie_SIR, 
                            Milan_SIR,
                            HRYC_Spain_SIR,
                            LaPaz_Lazaro_SIR,
                            MBIRA_Kvv_SIR, 
                            Oxfordshire_Kvv_SIR, 
                            NeumannGermany_SIR,
                          PachecoBrazil_SIR,
                          HarrisMERINO_SIR,
                            FeaseyMalawi_SIR,
                            EgliSwitzerland_SIR,
                            HailuEthiopia_SIR, 
                            Whitelaw_CHAMPS_SIR), .id = 'Dataset') %>%
  filter(Group!="pred_S_I" & Group!="pred_I_R")

SIR_counts_table$Dataset <- gsub("_SIR",'',SIR_counts_table$Dataset)

SIR_counts_table<-SIR_counts_table %>% select(-Group, -S_I)

S_counts_table <-
SIR_counts_table %>% 
  group_by(Dataset) %>% 
  summarise(S_total = sum(S))

R_counts_table <-
SIR_counts_table %>% 
  group_by(Dataset) %>% 
  summarise(R_total = sum(R))

IR_counts_table <-
  SIR_counts_table %>%
  group_by(Dataset) %>% 
  summarise(IR_total = sum(I_R))

SR_table<- full_join(S_counts_table, R_counts_table) 
SIR_table<- full_join(SR_table, IR_counts_table)  %>% 
  mutate(I_total=IR_total-R_total) %>% 
  mutate(N_total=S_total+IR_total)

SIR_table$Dataset[SIR_table$Dataset == 'AGAR'] <- 'AGAR GnSOP'
SIR_table$Dataset[SIR_table$Dataset == 'EUSCAPE_EURECA'] <- 'EUSCAPE & EURECA'
SIR_table$Dataset[SIR_table$Dataset == 'EgliSwitzerland'] <- 'University of Zurich'
SIR_table$Dataset[SIR_table$Dataset == 'FeaseyMalawi'] <- 'Queen Elizabeth Central Hospital'
SIR_table$Dataset[SIR_table$Dataset == 'HRYC_Spain'] <- 'HURYC & CIBERINFEC'
SIR_table$Dataset[SIR_table$Dataset == 'HailuEthiopia'] <- 'Addis Ababa University*'
SIR_table$Dataset[SIR_table$Dataset == 'HowdenGorrie'] <- 'Controlling Superbugs study & Victorian CPE program'
SIR_table$Dataset[SIR_table$Dataset == 'IZSAM_Italy'] <- 'IZSAM'
SIR_table$Dataset[SIR_table$Dataset == 'IndiaGHRU'] <- 'India GHRU'
SIR_table$Dataset[SIR_table$Dataset == 'KayamaJapan'] <- 'JARBS-GNR'
SIR_table$Dataset[SIR_table$Dataset == 'LaPaz_Lazaro'] <- 'HULP'
SIR_table$Dataset[SIR_table$Dataset == 'LaPaz_Lazaro'] <- 'HULP'
SIR_table$Dataset[SIR_table$Dataset == 'MBIRA_Kpn'] <- 'MBIRA'
SIR_table$Dataset[SIR_table$Dataset == 'MBIRA_Kvv'] <- 'MBIRA*'
SIR_table$Dataset[SIR_table$Dataset == 'Milan'] <- 'IRCCS Ospedale San Raffaele'
SIR_table$Dataset[SIR_table$Dataset == 'NeumannGermany'] <- 'SCHARKI'
SIR_table$Dataset[SIR_table$Dataset == 'Oxfordshire_Kpn'] <- 'Oxfordshire'
SIR_table$Dataset[SIR_table$Dataset == 'Oxfordshire_Kvv'] <- 'Oxfordshire*'
SIR_table$Dataset[SIR_table$Dataset == 'Whitelaw_CHAMPS'] <- 'CHAMPS'

Combining validation sets R and NWT

validation_summary <- rbind(EUSCAPE_EURECA_R_prediction_summary$summary,
                            EUSCAPE_EURECA_NWT_prediction_summary$summary,
                            KayamaJapan_R_prediction_summary$summary, 
                            KayamaJapan_NWT_prediction_summary$summary, 
                            IndiaGHRU_R_prediction_summary$summary,
                            IndiaGHRU_NWT_prediction_summary$summary,
                            AGAR_R_prediction_summary$summary, 
                            AGAR_NWT_prediction_summary$summary,
                            Oxfordshire_Kpn_R_prediction_summary$summary, 
                            Oxfordshire_Kpn_NWT_prediction_summary$summary, 
                            IZSAM_Italy_R_prediction_summary$summary, 
                            IZSAM_Italy_NWT_prediction_summary$summary, 
                            MBIRA_Kpn_R_prediction_summary$summary,
                            MBIRA_Kpn_NWT_prediction_summary$summary, 
                            HowdenGorrie_R_prediction_summary$summary, 
                            HowdenGorrie_NWT_prediction_summary$summary,
                            Milan_R_prediction_summary$summary, 
                            Milan_NWT_prediction_summary$summary,
                            HRYC_Spain_R_prediction_summary$summary,
                            HRYC_Spain_NWT_prediction_summary$summary,
                            LaPaz_Lazaro_R_prediction_summary$summary,
                            LaPaz_Lazaro_NWT_prediction_summary$summary,
                            MBIRA_Kvv_R_prediction_summary$summary,
                            MBIRA_Kvv_NWT_prediction_summary$summary,
                            Oxfordshire_Kvv_R_prediction_summary$summary, 
                            Oxfordshire_Kvv_NWT_prediction_summary$summary, 
                            NeumannGermany_R_prediction_summary$summary,
                            NeumannGermany_NWT_prediction_summary$summary,
                            PachecoBrazil_R_prediction_summary$summary,
                            PachecoBrazil_NWT_prediction_summary$summary,
                            HarrisMERINO_R_prediction_summary$summary,
                            HarrisMERINO_NWT_prediction_summary$summary,
                            FeaseyMalawi_R_prediction_summary$summary, 
                            FeaseyMalawi_NWT_prediction_summary$summary,
                            EgliSwitzerland_R_prediction_summary$summary,
                            EgliSwitzerland_NWT_prediction_summary$summary,
                            HailuEthiopia_R_prediction_summary$summary, 
                            HailuEthiopia_NWT_prediction_summary$summary, 
                            Whitelaw_CHAMPS_R_prediction_summary$summary, 
                            Whitelaw_CHAMPS_NWT_prediction_summary$summary, 
                            pooled_R_summary$summary, 
                            pooled_NWT_summary$summary)


validation_summary_table <- as_tibble(validation_summary) %>%
  mutate(Prediction=c("R", "NWT","R", "NWT","R", "NWT","R", "NWT","R", "NWT", "R", "NWT", "R", "NWT","R", "NWT","R", "NWT","R", "NWT","R", "NWT","R", "NWT","R", "NWT","R", "NWT","R", "NWT","R", "NWT","R", "NWT","R", "NWT","R", "NWT", "R", "NWT","R", "NWT"))%>%
  mutate(Dataset=c("EUSCAPE & EURECA", "EUSCAPE & EURECA","JARBS-GNR", "JARBS-GNR", "India GHRU", "India GHRU", "AGAR GnSOP", "AGAR GnSOP","Oxfordshire","Oxfordshire","IZSAM","IZSAM", "MBIRA", "MBIRA", "Controlling Superbugs study & Victorian CPE program", "Controlling Superbugs study & Victorian CPE program",  "IRCCS Ospedale San Raffaele", "IRCCS Ospedale San Raffaele",  "HURYC & CIBERINFEC", "HURYC & CIBERINFEC", "HULP", "HULP", "MBIRA*", "MBIRA*","Oxfordshire*","Oxfordshire*", "SCHARKI", "SCHARKI", "PachecoBrazil", "PachecoBrazil","HarrisMERINO","HarrisMERINO","Queen Elizabeth Central Hospital", "Queen Elizabeth Central Hospital","University of Zurich", "University of Zurich", "Addis Ababa University*", "Addis Ababa University*", "CHAMPS", "CHAMPS", "Pooled", "Pooled")) %>%
    mutate(Country=c("Various European countries", "Various European countries","Japan", "Japan", "India", "India", "Australia", "Australia","United Kingdom","United Kingdom","Italy","Italy","Tanzania, Ethiopia, Malawi, Zambia, Ghana, Nigeria, South Africa", "Tanzania, Ethiopia, Malawi, Zambia, Ghana, Nigeria, South Africa", "Australia", "Australia",  "Italy", "Italy",  "Spain, Portugal", "Spain, Portugal","Spain", "Spain", "Ethiopia, South Africa, Tanzania", "Ethiopia, South Africa, Tanzania","United Kingdom", "United Kingdom", "Germany", "Germany","Brazil", "Brazil", "Various countries", "Various countries", "Malawi", "Malawi","Switzerland", "Switzerland", "Ethiopia", "Ethiopia", "South Africa", "South Africa", "", "")) %>%
    rename(Sensitivity=sens, Specificity=spec, `ME`=me, `VME`=vme, `Categorical agreement`=cat, `%R`=perc_R, N=n) %>%
  mutate(Sensitivity=paste0(round(as.numeric(Sensitivity)*100,2),"%")) %>%
  mutate(Specificity=paste0(round(as.numeric(Specificity)*100,2),"%")) %>%
  mutate(ME=paste0(round(as.numeric(ME)*100,2),"%")) %>%
  mutate(VME=paste0(round(as.numeric(VME)*100,2),"%")) %>%
  mutate(`Categorical agreement`=paste0(round(as.numeric(`Categorical agreement`)*100,2),"%")) %>% 
  mutate(`%R`=paste0(round(as.numeric(`%R`)*100,2),"%")) 
 
  # %>% select(Prediction, Dataset, Country, `Categorical agreement`, Sensitivity, Specificity, ME, VME, `%R` ,N)

#write_csv(validation_summary_table, "../tables/Table3_extValidation_evalMetrics.csv", na="-")

Validaion sets - removing I

MBIRA_Kpn_excludeI_prediction <- matrix(c(subset(MBIRA_Kpn_SIR, Group == "pred_S_I")$S, 
                subset(MBIRA_Kpn_SIR, Group == "pred_S_I")$R,
                subset(MBIRA_Kpn_SIR, Group == "pred_R")$S,
                subset(MBIRA_Kpn_SIR, Group == "pred_R")$R), ncol=2, byrow=TRUE)

colnames(MBIRA_Kpn_excludeI_prediction) <- c('S','R')
rownames(MBIRA_Kpn_excludeI_prediction) <- c('pred_S_I','pred_R')

MBIRA_Kpn_excludeI_prediction_summary<-metrics(MBIRA_Kpn_excludeI_prediction)
MBIRA_Kvv_excludeI_prediction <- matrix(c(subset(MBIRA_Kvv_SIR, Group == "pred_S_I")$S, 
                subset(MBIRA_Kvv_SIR, Group == "pred_S_I")$R,
                subset(MBIRA_Kvv_SIR, Group == "pred_R")$S,
                subset(MBIRA_Kvv_SIR, Group == "pred_R")$R), ncol=2, byrow=TRUE)

colnames(MBIRA_Kvv_excludeI_prediction) <- c('S','R')
rownames(MBIRA_Kvv_excludeI_prediction) <- c('pred_S_I','pred_R')

MBIRA_Kvv_excludeI_prediction_summary<-metrics(MBIRA_Kvv_excludeI_prediction)
HowdenGorrie_excludeI_prediction <- matrix(c(subset(HowdenGorrie_SIR, Group == "pred_S_I")$S, 
                subset(HowdenGorrie_SIR, Group == "pred_S_I")$R,
                subset(HowdenGorrie_SIR, Group == "pred_R")$S,
                subset(HowdenGorrie_SIR, Group == "pred_R")$R), ncol=2, byrow=TRUE)

colnames(HowdenGorrie_excludeI_prediction) <- c('S','R')
rownames(HowdenGorrie_excludeI_prediction) <- c('pred_S_I','pred_R')

HowdenGorrie_excludeI_prediction_summary<-metrics(HowdenGorrie_excludeI_prediction)
AGAR_excludeI_prediction <- matrix(c(subset(AGAR_SIR, Group == "pred_S_I")$S, 
                subset(AGAR_SIR, Group == "pred_S_I")$R,
                subset(AGAR_SIR, Group == "pred_R")$S,
                subset(AGAR_SIR, Group == "pred_R")$R), ncol=2, byrow=TRUE)

colnames(AGAR_excludeI_prediction) <- c('S','R')
rownames(AGAR_excludeI_prediction) <- c('pred_S_I','pred_R')

AGAR_excludeI_prediction_summary<-metrics(AGAR_excludeI_prediction)
Milan_excludeI_prediction <- matrix(c(subset(Milan_SIR, Group == "pred_S_I")$S, 
                subset(Milan_SIR, Group == "pred_S_I")$R,
                subset(Milan_SIR, Group == "pred_R")$S,
                subset(Milan_SIR, Group == "pred_R")$R), ncol=2, byrow=TRUE)

colnames(Milan_excludeI_prediction) <- c('S','R')
rownames(Milan_excludeI_prediction) <- c('pred_S_I','pred_R')

Milan_excludeI_prediction_summary<-metrics(Milan_excludeI_prediction)
HailuEthiopia_excludeI_prediction <- matrix(c(subset(HailuEthiopia_SIR, Group == "pred_S_I")$S, 
                subset(HailuEthiopia_SIR, Group == "pred_S_I")$R,
                subset(HailuEthiopia_SIR, Group == "pred_R")$S,
                subset(HailuEthiopia_SIR, Group == "pred_R")$R), ncol=2, byrow=TRUE)

colnames(HailuEthiopia_excludeI_prediction) <- c('S','R')
rownames(HailuEthiopia_excludeI_prediction) <- c('pred_S_I','pred_R')

HailuEthiopia_excludeI_prediction_summary<-metrics(HailuEthiopia_excludeI_prediction)
KayamaJapan_excludeI_prediction <- matrix(c(subset(KayamaJapan_SIR, Group == "pred_S_I")$S, 
                subset(KayamaJapan_SIR, Group == "pred_S_I")$R,
                subset(KayamaJapan_SIR, Group == "pred_R")$S,
                subset(KayamaJapan_SIR, Group == "pred_R")$R), ncol=2, byrow=TRUE)

colnames(KayamaJapan_excludeI_prediction) <- c('S','R')
rownames(KayamaJapan_excludeI_prediction) <- c('pred_S_I','pred_R')

KayamaJapan_excludeI_prediction_summary<-metrics(KayamaJapan_excludeI_prediction)
FeaseyMalawi_excludeI_prediction <- matrix(c(subset(FeaseyMalawi_SIR, Group == "pred_S_I")$S, 
                subset(FeaseyMalawi_SIR, Group == "pred_S_I")$R,
                subset(FeaseyMalawi_SIR, Group == "pred_R")$S,
                subset(FeaseyMalawi_SIR, Group == "pred_R")$R), ncol=2, byrow=TRUE)

colnames(FeaseyMalawi_excludeI_prediction) <- c('S','R')
rownames(FeaseyMalawi_excludeI_prediction) <- c('pred_S_I','pred_R')

FeaseyMalawi_excludeI_prediction_summary<-metrics(FeaseyMalawi_excludeI_prediction)
EUSCAPE_EURECA_excludeI_prediction <- matrix(c(subset(EUSCAPE_EURECA_SIR, Group == "pred_S_I")$S, 
                subset(EUSCAPE_EURECA_SIR, Group == "pred_S_I")$R,
                subset(EUSCAPE_EURECA_SIR, Group == "pred_R")$S,
                subset(EUSCAPE_EURECA_SIR, Group == "pred_R")$R), ncol=2, byrow=TRUE)

colnames(EUSCAPE_EURECA_excludeI_prediction) <- c('S','R')
rownames(EUSCAPE_EURECA_excludeI_prediction) <- c('pred_S_I','pred_R')

EUSCAPE_EURECA_excludeI_prediction_summary<-metrics(EUSCAPE_EURECA_excludeI_prediction)
EgliSwitzerland_excludeI_prediction <- matrix(c(subset(EgliSwitzerland_SIR, Group == "pred_S_I")$S, 
                subset(EgliSwitzerland_SIR, Group == "pred_S_I")$R,
                subset(EgliSwitzerland_SIR, Group == "pred_R")$S,
                subset(EgliSwitzerland_SIR, Group == "pred_R")$R), ncol=2, byrow=TRUE)

colnames(EgliSwitzerland_excludeI_prediction) <- c('S','R')
rownames(EgliSwitzerland_excludeI_prediction) <- c('pred_S_I','pred_R')

EgliSwitzerland_excludeI_prediction_summary<-metrics(EgliSwitzerland_excludeI_prediction)
HRYC_Spain_excludeI_prediction <- matrix(c(subset(HRYC_Spain_SIR, Group == "pred_S_I")$S, 
                subset(HRYC_Spain_SIR, Group == "pred_S_I")$R,
                subset(HRYC_Spain_SIR, Group == "pred_R")$S,
                subset(HRYC_Spain_SIR, Group == "pred_R")$R), ncol=2, byrow=TRUE)

colnames(HRYC_Spain_excludeI_prediction) <- c('S','R')
rownames(HRYC_Spain_excludeI_prediction) <- c('pred_S_I','pred_R')

HRYC_Spain_excludeI_prediction_summary<-metrics(HRYC_Spain_excludeI_prediction)
IndiaGHRU_excludeI_prediction <- matrix(c(subset(IndiaGHRU_SIR, Group == "pred_S_I")$S, 
                subset(IndiaGHRU_SIR, Group == "pred_S_I")$R,
                subset(IndiaGHRU_SIR, Group == "pred_R")$S,
                subset(IndiaGHRU_SIR, Group == "pred_R")$R), ncol=2, byrow=TRUE)

colnames(IndiaGHRU_excludeI_prediction) <- c('S','R')
rownames(IndiaGHRU_excludeI_prediction) <- c('pred_S_I','pred_R')

IndiaGHRU_excludeI_prediction_summary<-metrics(IndiaGHRU_excludeI_prediction)
NeumannGermany_excludeI_prediction <- matrix(c(subset(NeumannGermany_SIR, Group == "pred_S_I")$S, 
                subset(NeumannGermany_SIR, Group == "pred_S_I")$R,
                subset(NeumannGermany_SIR, Group == "pred_R")$S,
                subset(NeumannGermany_SIR, Group == "pred_R")$R), ncol=2, byrow=TRUE)

colnames(NeumannGermany_excludeI_prediction) <- c('S','R')
rownames(NeumannGermany_excludeI_prediction) <- c('pred_S_I','pred_R')

NeumannGermany_excludeI_prediction_summary<-metrics(NeumannGermany_excludeI_prediction)
Whitelaw_CHAMPS_excludeI_prediction <- matrix(c(subset(Whitelaw_CHAMPS_SIR, Group == "pred_S_I")$S, 
                subset(Whitelaw_CHAMPS_SIR, Group == "pred_S_I")$R,
                subset(Whitelaw_CHAMPS_SIR, Group == "pred_R")$S,
                subset(Whitelaw_CHAMPS_SIR, Group == "pred_R")$R), ncol=2, byrow=TRUE)

colnames(Whitelaw_CHAMPS_excludeI_prediction) <- c('S','R')
rownames(Whitelaw_CHAMPS_excludeI_prediction) <- c('pred_S_I','pred_R')

Whitelaw_CHAMPS_excludeI_prediction_summary<-metrics(Whitelaw_CHAMPS_excludeI_prediction)
Oxfordshire_Kpn_excludeI_prediction <- matrix(c(subset(Oxfordshire_Kpn_SIR, Group == "pred_S_I")$S, 
                subset(Oxfordshire_Kpn_SIR, Group == "pred_S_I")$R,
                subset(Oxfordshire_Kpn_SIR, Group == "pred_R")$S,
                subset(Oxfordshire_Kpn_SIR, Group == "pred_R")$R), ncol=2, byrow=TRUE)

colnames(Oxfordshire_Kpn_excludeI_prediction) <- c('S','R')
rownames(Oxfordshire_Kpn_excludeI_prediction) <- c('pred_S_I','pred_R')

Oxfordshire_Kpn_excludeI_prediction_summary<-metrics(Oxfordshire_Kpn_excludeI_prediction)
Oxfordshire_Kvv_excludeI_prediction <- matrix(c(subset(Oxfordshire_Kvv_SIR, Group == "pred_S_I")$S, 
                subset(Oxfordshire_Kvv_SIR, Group == "pred_S_I")$R,
                subset(Oxfordshire_Kvv_SIR, Group == "pred_R")$S,
                subset(Oxfordshire_Kvv_SIR, Group == "pred_R")$R), ncol=2, byrow=TRUE)

colnames(Oxfordshire_Kvv_excludeI_prediction) <- c('S','R')
rownames(Oxfordshire_Kvv_excludeI_prediction) <- c('pred_S_I','pred_R')

Oxfordshire_Kvv_excludeI_prediction_summary<-metrics(Oxfordshire_Kvv_excludeI_prediction)
IZSAM_Italy_excludeI_prediction <- matrix(c(subset(IZSAM_Italy_SIR, Group == "pred_S_I")$S, 
                subset(IZSAM_Italy_SIR, Group == "pred_S_I")$R,
                subset(IZSAM_Italy_SIR, Group == "pred_R")$S,
                subset(IZSAM_Italy_SIR, Group == "pred_R")$R), ncol=2, byrow=TRUE)

colnames(IZSAM_Italy_excludeI_prediction) <- c('S','R')
rownames(IZSAM_Italy_excludeI_prediction) <- c('pred_S_I','pred_R')

IZSAM_Italy_excludeI_prediction_summary<-metrics(IZSAM_Italy_excludeI_prediction)
LaPaz_Lazaro_excludeI_prediction <- matrix(c(subset(LaPaz_Lazaro_SIR, Group == "pred_S_I")$S, 
                subset(LaPaz_Lazaro_SIR, Group == "pred_S_I")$R,
                subset(LaPaz_Lazaro_SIR, Group == "pred_R")$S,
                subset(LaPaz_Lazaro_SIR, Group == "pred_R")$R), ncol=2, byrow=TRUE)

colnames(LaPaz_Lazaro_excludeI_prediction) <- c('S','R')
rownames(LaPaz_Lazaro_excludeI_prediction) <- c('pred_S_I','pred_R')

LaPaz_Lazaro_excludeI_prediction_summary<-metrics(LaPaz_Lazaro_excludeI_prediction)
HarrisMERINO_excludeI_prediction <- matrix(c(subset(HarrisMERINO_SIR, Group == "pred_S_I")$S, 
                subset(HarrisMERINO_SIR, Group == "pred_S_I")$R,
                subset(HarrisMERINO_SIR, Group == "pred_R")$S,
                subset(HarrisMERINO_SIR, Group == "pred_R")$R), ncol=2, byrow=TRUE)

colnames(HarrisMERINO_excludeI_prediction) <- c('S','R')
rownames(HarrisMERINO_excludeI_prediction) <- c('pred_S_I','pred_R')

HarrisMERINO_excludeI_prediction_summary<-metrics(HarrisMERINO_excludeI_prediction)
PachecoBrazil_excludeI_prediction <- matrix(c(subset(PachecoBrazil_SIR, Group == "pred_S_I")$S, 
                subset(PachecoBrazil_SIR, Group == "pred_S_I")$R,
                subset(PachecoBrazil_SIR, Group == "pred_R")$S,
                subset(PachecoBrazil_SIR, Group == "pred_R")$R), ncol=2, byrow=TRUE)

colnames(PachecoBrazil_excludeI_prediction) <- c('S','R')
rownames(PachecoBrazil_excludeI_prediction) <- c('pred_S_I','pred_R')

PachecoBrazil_excludeI_prediction_summary<-metrics(PachecoBrazil_excludeI_prediction)
pooled_exclude_I <- 
bind_rows(MBIRA_Kpn_SIR,
          MBIRA_Kvv_SIR, 
          HowdenGorrie_SIR,
          AGAR_SIR,
          Milan_SIR,
          HailuEthiopia_SIR,
          KayamaJapan_SIR,
          FeaseyMalawi_SIR, 
          EUSCAPE_EURECA_SIR,
          EgliSwitzerland_SIR,
          HRYC_Spain_SIR,
          IndiaGHRU_SIR,
          NeumannGermany_SIR,
          Whitelaw_CHAMPS_SIR,
          Oxfordshire_Kpn_SIR,
          Oxfordshire_Kvv_SIR,
          IZSAM_Italy_SIR, 
          LaPaz_Lazaro_SIR,
          HarrisMERINO_SIR,
          PachecoBrazil_SIR) %>%
    group_by(Group) %>%
    summarise_all(funs(sum(., na.rm = TRUE)))

pooled_excludeI_prediction <- matrix(c(subset(pooled_exclude_I, Group == "pred_S_I")$S, 
                subset(pooled_exclude_I, Group == "pred_S_I")$R,
                subset(pooled_exclude_I, Group == "pred_R")$S,
                subset(pooled_exclude_I, Group == "pred_R")$R), ncol=2, byrow=TRUE)

colnames(pooled_excludeI_prediction) <- c('S','R')
rownames(pooled_excludeI_prediction) <- c('pred_S_I','pred_R')

pooled_excludeI_summary<-metrics(pooled_excludeI_prediction)

Combining validation sets - excluding I

validation_summary <- rbind(
                            EUSCAPE_EURECA_excludeI_prediction_summary$summary,
                            KayamaJapan_excludeI_prediction_summary$summary, 
                            IndiaGHRU_excludeI_prediction_summary$summary,
                            AGAR_excludeI_prediction_summary$summary,
                          Oxfordshire_Kpn_excludeI_prediction_summary$summary, 
                            IZSAM_Italy_excludeI_prediction_summary$summary, 
                            MBIRA_Kpn_excludeI_prediction_summary$summary, 
                            HowdenGorrie_excludeI_prediction_summary$summary, 
                            Milan_excludeI_prediction_summary$summary,
                            HRYC_Spain_excludeI_prediction_summary$summary,
                            LaPaz_Lazaro_excludeI_prediction_summary$summary,
                            MBIRA_Kvv_excludeI_prediction_summary$summary, 
                            Oxfordshire_Kvv_excludeI_prediction_summary$summary, 
                            NeumannGermany_excludeI_prediction_summary$summary,
                          PachecoBrazil_excludeI_prediction_summary$summary,
HarrisMERINO_excludeI_prediction_summary$summary,
                            FeaseyMalawi_excludeI_prediction_summary$summary,
                            EgliSwitzerland_excludeI_prediction_summary$summary,
                            HailuEthiopia_excludeI_prediction_summary$summary, 
                            Whitelaw_CHAMPS_excludeI_prediction_summary$summary, 
                          pooled_excludeI_summary$summary)


validation_summary_table_excludeI <- as_tibble(validation_summary) %>% 
  mutate(Prediction=c("R_excludeI", "R_excludeI","R_excludeI", "R_excludeI","R_excludeI", "R_excludeI","R_excludeI", "R_excludeI","R_excludeI", "R_excludeI", "R_excludeI", "R_excludeI", "R_excludeI", "R_excludeI","R_excludeI", "R_excludeI","R_excludeI", "R_excludeI", "R_excludeI","R_excludeI", "R_excludeI"))%>%
  mutate(Dataset=c("EUSCAPE & EURECA","JARBS-GNR", "India GHRU", "AGAR GnSOP","Oxfordshire","IZSAM", "MBIRA", "Controlling Superbugs study & Victorian CPE program", "IRCCS Ospedale San Raffaele", "HURYC & CIBERINFEC", "HULP", "MBIRA*","Oxfordshire*", "SCHARKI","PachecoBrazil","HarrisMERINO", "Queen Elizabeth Central Hospital", "University of Zurich", "Addis Ababa University*", "CHAMPS", "Pooled")) %>%
    mutate(Country=c("Various European countries","Japan", "India", "Australia","United Kingdom","Italy","Tanzania, Ethiopia, Malawi, Zambia, Ghana, Nigeria, South Africa", "Australia", "Italy","Spain, Portugal", "Spain", "Ethiopia, South Africa, Tanzania" ,"United Kingdom", "Germany", "Brazil", "Various countries", "Malawi","Switzerland", "Ethiopia", "South Africa", "")) %>%
    rename(Sensitivity=sens, Specificity=spec, `ME`=me, `VME`=vme, `Categorical agreement`=cat, `%R`=perc_R, N=n) %>%
  mutate(Sensitivity=paste0(round(as.numeric(Sensitivity)*100,2),"%")) %>%
  mutate(Specificity=paste0(round(as.numeric(Specificity)*100,2),"%")) %>%
  mutate(ME=paste0(round(as.numeric(ME)*100,2),"%")) %>%
  mutate(VME=paste0(round(as.numeric(VME)*100,2),"%")) %>%
  mutate(`Categorical agreement`=paste0(round(as.numeric(`Categorical agreement`)*100,2),"%")) %>% 
  mutate(`%R`=paste0(round(as.numeric(`%R`)*100,2),"%"))
#%>% select(Prediction, Dataset, Country, `Categorical agreement`, Sensitivity, Specificity, ME, VME, `%R` ,N)

#write_csv(validation_summary_table_excludeI, "../tables/Table3_extValidation_evalMetrics_excludeI.csv", na="-")

Plotting validation sets (only R_excludeI)

validation_summary_table_long<-gather(validation_summary_table_excludeI, `Evaluation metric`, value, `Categorical agreement`:`VME`)

validation_summary_table_long<- validation_summary_table_long %>% mutate(value = as.numeric(gsub("%", "", value))) %>% rename(metric_value=value)

validation_summary_table_long_ci<-
validation_summary_table_long %>% 
  gather(v, value, cat_lb:vme_ub) %>% 
  separate(v, c("var", "col")) %>% 
  arrange(Dataset) %>% 
  spread(col, value)

validation_summary_table_long_ci <- 
validation_summary_table_long_ci %>% 
  filter(`Evaluation metric`=="Categorical agreement" & var =="cat" |
           `Evaluation metric`=="ME" & var =="me" |
           `Evaluation metric`=="VME" & var =="vme") %>%
  filter(Prediction=="R_excludeI")


validation_summary_table_long_ci$`Evaluation metric` = factor(validation_summary_table_long_ci$`Evaluation metric`, levels=c("Categorical agreement","ME", 'VME'))

#validation_summary_table_median<-
#validation_summary_table_long_ci %>% group_by(`Evaluation metric`) %>% 
#  summarise(metric_value = median(as.numeric(metric_value), na.rm = T)) %>%
#  mutate(Dataset="Overall median") %>%
#  mutate(var="") %>%
#  mutate(ub="") %>%
#  mutate(lb="") %>%
#  mutate(Country="") %>%
#  mutate(N="") %>%
#  mutate(`%R`="") %>%
#  mutate(`Prediction`="")

#validation_summary_table_total <-
#rbind(validation_summary_table_long_ci)

validation_summary_table_total <- full_join(validation_summary_table_long_ci, SIR_table) %>% 
  mutate(Dataset = (gsub("Pooled", "All datasets combined", Dataset)))%>%
  mutate(Dataset_label=paste(Dataset, " (S=", S_total, ", R=",R_total, ")", sep = ""))

#validation_summary_table_total$Dataset_label[validation_summary_table_total$Dataset_label == 'Overall median (S=NA, R=NA)'] <- 'Overall median'

# Get totals to explicitly write in label
colSums(SIR_table[,-1])
##  S_total  R_total IR_total  I_total  N_total 
##     1665     4462     5094      632     6759
validation_summary_table_total$Dataset_label[validation_summary_table_total$Dataset_label == 'All datasets combined (S=NA, R=NA)'] <- "All datasets combined (S=1665, R=4462)"


validation_summary_table_total<- validation_summary_table_total %>% 
  mutate(Dataset_label = (gsub("EUSCAPE & EURECA", "A (EUCAST)", Dataset_label)))%>%
  mutate(Dataset_label = (gsub("India GHRU", "B (EUCAST)", Dataset_label)))%>% 
  mutate(Dataset_label = (gsub("AGAR GnSOP", "C (EUCAST)", Dataset_label)))%>% 
    mutate(Dataset_label = (gsub("Oxfordshire\\*", "XXXXXXX", Dataset_label)))%>% 
  mutate(Dataset_label = (gsub("Oxfordshire", "D (EUCAST)", Dataset_label)))%>% 
      mutate(Dataset_label = (gsub("XXXXXXX", "Oxfordshire (Kvv, EUCAST)", Dataset_label)))%>% 
    mutate(Dataset_label = (gsub("HURYC & CIBERINFEC", "E (EUCAST)", Dataset_label)))%>% 
  mutate(Dataset_label = (gsub("HULP", "F (EUCAST)", Dataset_label)))%>% 
  mutate(Dataset_label = (gsub("JARBS-GNR", "G (CLSI)", Dataset_label)))%>% 
      mutate(Dataset_label = (gsub("MBIRA\\*", "YYYYYYYY", Dataset_label)))%>% 
  mutate(Dataset_label = (gsub("MBIRA", "H (CLSI)", Dataset_label)))%>%
        mutate(Dataset_label = (gsub("YYYYYYYY", "MBIRA (Kvv, CLSI)", Dataset_label)))%>% 
  mutate(Dataset_label = (gsub("Controlling Superbugs study & Victorian CPE program", "I (CLSI)", Dataset_label)))%>% 
   mutate(Dataset_label = (gsub("Queen Elizabeth Central Hospital", "J* (EUCAST/BSAC)", Dataset_label)))%>% 
    mutate(Dataset_label = (gsub("Addis Ababa University\\*", "K* (Kvv, CLSI)", Dataset_label)))%>%
  mutate(Dataset_label = (gsub("University of Zurich", "L* (EUCAST)", Dataset_label)))%>%
  mutate(Dataset_label = (gsub("CHAMPS", "M* (CLSI)", Dataset_label)))

validation_summary_table_total$Dataset_label <- factor(as.character(validation_summary_table_total$Dataset_label), levels = c("A (EUCAST) (S=291, R=1687)", "B (EUCAST) (S=84, R=441)","C (EUCAST) (S=168, R=255)", "D (EUCAST) (S=242, R=37)", "E (EUCAST) (S=20, R=146)", "F (EUCAST) (S=11, R=136)", "G (CLSI) (S=222, R=449)","H (CLSI) (S=113, R=129)", "I (CLSI) (S=18, R=153)", "J* (EUCAST/BSAC) (S=196, R=411)","K* (Kvv, CLSI) (S=37, R=28)","L* (EUCAST) (S=11, R=53)", "M* (CLSI) (S=18, R=21)", "IZSAM (S=7, R=273)",  "IRCCS Ospedale San Raffaele (S=9, R=187)", "MBIRA (Kvv, CLSI) (S=86, R=5)",  "Oxfordshire (Kvv, EUCAST) (S=64, R=0)", "SCHARKI (S=51, R=2)", "PachecoBrazil (S=10, R=28)", "HarrisMERINO (S=7, R=21)","All datasets combined (S=1665, R=4462)", "Overall median"))




validation_summary_table_total_exclude <- 
validation_summary_table_total %>%
  filter(S_total>10 | is.na(S_total)) %>%
  filter(R_total>10 | is.na(R_total)) %>%
  filter(N_total>25 | is.na(N_total)) 


validation_summary_table_median<-
validation_summary_table_total_exclude %>% group_by(`Evaluation metric`) %>% 
  summarise(metric_value = median(as.numeric(metric_value), na.rm = T)) %>%
  mutate(Dataset="Overall median") %>%
  mutate(var="") %>%
  mutate(ub="") %>%
  mutate(lb="") %>%
  mutate(Country="") %>%
  mutate(N="") %>%
  mutate(`%R`="") %>%
  mutate(`Prediction`="")%>%
  mutate(`S_total`="")%>%
  mutate(`I_total`="")%>%
  mutate(`IR_total`="")%>%
  mutate(`R_total`="")%>%
  mutate(`N_total`="")%>%
  mutate(`Dataset_label`="Overall median")

validation_summary_table_total_exclude <-
rbind(validation_summary_table_total_exclude, validation_summary_table_median)

validation_summary_table_total_exclude$lb[validation_summary_table_total_exclude$lb<0] <- 0

validation_summary_table_plot <-ggplot(validation_summary_table_total_exclude, aes(as.numeric(metric_value),(Dataset_label))) +
  geom_point(aes(), alpha=0.5, col="#9f98e9", size=2) +
  geom_linerange(aes(xmin=as.numeric(lb)*100, xmax=as.numeric(ub)*100), col="#887feb", position=pd, size=0.5) +
  scale_y_discrete(limits=rev)+
  scale_x_continuous(breaks=seq(0, 100, 20)) +
  facet_grid(.~`Evaluation metric`, scales = "free", space='free')+ 
  labs(y="", x = "%") + 
  theme_bw()+
  theme(legend.title=element_blank())+
  theme(axis.text.x = element_text(angle = 45, hjust=1)) +
  geom_vline(data=filter(validation_summary_table_total_exclude, `Evaluation metric`=="Categorical agreement"), aes(xintercept=90), colour="#fb9472")+
  geom_vline(data=filter(validation_summary_table_total_exclude, `Evaluation metric`=="ME"), aes(xintercept=3), colour="#fb9472")+
  geom_vline(data=filter(validation_summary_table_total_exclude, `Evaluation metric`=="VME"), aes(xintercept=1.5), colour="#fb9472")

ggsave(width=7, height=4, file="../figs/Fig7_extValidation_summary.pdf")
ggsave(width=7, height=4, file="../figs/Fig7_extValidation_summary.png")

TableS10 - eval metrics

all_datasets_metrics <-
#Below includes NWT + R (including I) + R (excluding I)
  rbind(validation_summary_table, validation_summary_table_excludeI) %>%
  #Below includes only R (excluding I)
  #rbind(validation_summary_table_excludeI) %>%
  mutate(`Categorical_agreement_conf_int_lower_bound`=paste0(round(as.numeric(cat_lb)*100,2),"%")) %>%
  mutate(`Categorical_agreement_conf_int_upper_bound`=paste0(round(as.numeric(cat_ub)*100,2),"%")) %>%
  mutate(`ME_conf_int_lower_bound`=paste0(round(as.numeric(me_lb)*100,2),"%")) %>%
  mutate(`ME_conf_int_upper_bound`=paste0(round(as.numeric(me_ub)*100,2),"%")) %>%
  mutate(`VME_conf_int_lower_bound`=paste0(round(as.numeric(vme_lb)*100,2),"%")) %>%
  mutate(`VME_conf_int_upper_bound`=paste0(round(as.numeric(vme_ub)*100,2),"%")) %>%
  mutate(`Sensitivity_conf_int_lower_bound`=paste0(round(as.numeric(sens_lb)*100,2),"%")) %>%
  mutate(`Sensitivity_conf_int_upper_bound`=paste0(round(as.numeric(sens_ub)*100,2),"%")) %>%
  mutate(`Specificity_conf_int_lower_bound`=paste0(round(as.numeric(spec_lb)*100,2),"%")) %>%
  mutate(`Specificity_conf_int_upper_bound`=paste0(round(as.numeric(spec_ub)*100,2),"%"))

all_datasets_metrics <- full_join(all_datasets_metrics, SIR_table) %>% select (-c(IR_total, N_total)) %>%
  rename(Susceptible=S_total)%>%
  rename(Intermediate=I_total)%>%
  rename(Resistant=R_total)

all_datasets_metrics <- all_datasets_metrics[, c("Dataset", "N","Susceptible", "Intermediate", "Resistant", "Prediction", "%R", "Categorical agreement", "Categorical_agreement_conf_int_lower_bound", "Categorical_agreement_conf_int_upper_bound", "ME", "ME_conf_int_lower_bound", "ME_conf_int_upper_bound", "VME", "VME_conf_int_lower_bound", "VME_conf_int_upper_bound", "Sensitivity", "Sensitivity_conf_int_lower_bound", "Sensitivity_conf_int_upper_bound", "Specificity", "Specificity_conf_int_lower_bound", "Specificity_conf_int_upper_bound")]


all_datasets_metrics<- all_datasets_metrics %>% 
  mutate(Dataset = (gsub("EUSCAPE & EURECA", "A (EUCAST)", Dataset)))%>%
  mutate(Dataset = (gsub("India GHRU", "B (EUCAST)", Dataset)))%>% 
  mutate(Dataset = (gsub("AGAR GnSOP", "C (EUCAST)", Dataset)))%>% 
    mutate(Dataset = (gsub("Oxfordshire\\*", "XXXXXXX", Dataset)))%>% 
  mutate(Dataset = (gsub("Oxfordshire", "D (EUCAST)", Dataset)))%>% 
      mutate(Dataset = (gsub("XXXXXXX", "Oxfordshire (Kvv, EUCAST)", Dataset)))%>% 
    mutate(Dataset = (gsub("HURYC & CIBERINFEC", "E (EUCAST)", Dataset)))%>% 
  mutate(Dataset = (gsub("HULP", "F (EUCAST)", Dataset)))%>% 
  mutate(Dataset = (gsub("JARBS-GNR", "G (CLSI)", Dataset)))%>% 
      mutate(Dataset = (gsub("MBIRA\\*", "YYYYYYYY", Dataset)))%>% 
  mutate(Dataset = (gsub("MBIRA", "H (CLSI)", Dataset)))%>%
        mutate(Dataset = (gsub("YYYYYYYY", "MBIRA (Kvv, CLSI)", Dataset)))%>% 
  mutate(Dataset = (gsub("Controlling Superbugs study & Victorian CPE program", "I (CLSI)", Dataset)))%>% 
   mutate(Dataset = (gsub("Queen Elizabeth Central Hospital", "J* (EUCAST/BSAC)", Dataset)))%>% 
    mutate(Dataset = (gsub("Addis Ababa University\\*", "K* (Kvv, CLSI)", Dataset)))%>%
  mutate(Dataset = (gsub("University of Zurich", "L* (EUCAST)", Dataset)))%>%
  mutate(Dataset = (gsub("CHAMPS", "M* (CLSI)", Dataset)))

all_datasets_metrics <- all_datasets_metrics %>% 
  mutate_all(funs(str_replace(., "NaN%", "-")))

write_tsv(all_datasets_metrics, "../tables/TableS10_extValidation_evalMetrics.tsv", na="-")

#Table S11 - genotype profiles vs. PPVs and median MICs

MIC prediction

discrete_quantile <- function(x, p) {

  if (length(x)>5){
    sort(x)[length(x)*p]
  } else return(0)}

validation_geno <- read_delim("../tables/TableS4_genotypes_validation.tsv")
validation_pheno <- read_delim("../tables/TableS5_phenotype_metadata_validation.tsv")

validation <- full_join(validation_geno, validation_pheno)

#MIC median - with I excluded
MIC_pred_excludeI <- validation%>%
  filter(Laboratory.Typing.Method != "Disk diffusion") %>% 
  filter(Resistance.phenotype!="I") %>%
  group_by(Dataset,cipro_prediction_group) %>%
  summarise(median=median(Measurement),  
            lower25=discrete_quantile(as.numeric(Measurement), p=0.25), 
            upper25=discrete_quantile(as.numeric(Measurement), p=0.75)) %>% mutate(IQR_excludeI=paste0(median, " [", lower25, "-", upper25, "]"))
 
MIC_pred_excludeI[grep("0-0", MIC_pred_excludeI$IQR_excludeI), "IQR_excludeI"] <- "-"

MIC_pred_excludeI$lower25[MIC_pred_excludeI$lower25==0] <- "-"
MIC_pred_excludeI$lower25[MIC_pred_excludeI$upper25==0] <- "-"

MIC_pred_excludeI <- MIC_pred_excludeI %>% rename(Group=cipro_prediction_group) %>%
    select (-c(median,lower25,upper25))

MIC_pred_excludeI_pooled <- validation%>%
  filter(Laboratory.Typing.Method != "Disk diffusion") %>% 
  filter(Resistance.phenotype!="I") %>%
  group_by(cipro_prediction_group) %>%
  summarise(median=median(Measurement),  
            lower25=discrete_quantile(as.numeric(Measurement), p=0.25), 
            upper25=discrete_quantile(as.numeric(Measurement), p=0.75)) %>% mutate(IQR_excludeI=paste0(median, " [", lower25, "-", upper25, "]")) %>%
  mutate(Dataset=c("Pooled", "Pooled","Pooled", "Pooled","Pooled", "Pooled","Pooled", "Pooled","Pooled", "Pooled"))

MIC_pred_excludeI_pooled[grep("0-0", MIC_pred_excludeI_pooled$IQR_excludeI), "IQR_excludeI"] <- "-"

MIC_pred_excludeI_pooled$lower25[MIC_pred_excludeI_pooled$lower25==0] <- "-"
MIC_pred_excludeI_pooled$lower25[MIC_pred_excludeI_pooled$upper25==0] <- "-"

MIC_pred_excludeI_pooled <- MIC_pred_excludeI_pooled %>% rename(Group=cipro_prediction_group) %>%
    select (-c(median,lower25,upper25))


MIC_pred_excludeI <- rbind(MIC_pred_excludeI, MIC_pred_excludeI_pooled)


#MIC median - all included
MIC_pred <- validation%>%
  filter(Laboratory.Typing.Method != "Disk diffusion") %>% 
  group_by(Dataset,cipro_prediction_group) %>%
  summarise(median=median(Measurement),  
            lower25=discrete_quantile(as.numeric(Measurement), p=0.25), 
            upper25=discrete_quantile(as.numeric(Measurement), p=0.75)) %>% mutate(IQR=paste0(median, " [", lower25, "-", upper25, "]"))
 
MIC_pred[grep("0-0", MIC_pred$IQR), "IQR"] <- "-"

MIC_pred$lower25[MIC_pred$lower25==0] <- "-"
MIC_pred$lower25[MIC_pred$upper25==0] <- "-"

MIC_pred <- MIC_pred %>% rename(Group=cipro_prediction_group) %>%
    select (-c(median,lower25,upper25)) 

MIC_pred_SIR_pooled <- validation%>%
  filter(Laboratory.Typing.Method != "Disk diffusion") %>% 
  group_by(cipro_prediction_group) %>%
  summarise(median=median(Measurement),  
            lower25=discrete_quantile(as.numeric(Measurement), p=0.25), 
            upper25=discrete_quantile(as.numeric(Measurement), p=0.75)) %>% mutate(IQR=paste0(median, " [", lower25, "-", upper25, "]")) %>%
  mutate(Dataset=c("Pooled", "Pooled","Pooled", "Pooled","Pooled", "Pooled","Pooled", "Pooled","Pooled", "Pooled"))

MIC_pred_SIR_pooled[grep("0-0", MIC_pred_SIR_pooled$IQR), "IQR"] <- "-"

MIC_pred_SIR_pooled$lower25[MIC_pred_SIR_pooled$lower25==0] <- "-"
MIC_pred_SIR_pooled$lower25[MIC_pred_SIR_pooled$upper25==0] <- "-"

MIC_pred_SIR_pooled <- MIC_pred_SIR_pooled %>% rename(Group=cipro_prediction_group) %>%
    select (-c(median,lower25,upper25))

MIC_pred <- rbind(MIC_pred, MIC_pred_SIR_pooled)
#genotype profiles
pooled_dataset_genotype <- 
bind_rows(lst(MBIRA_Kpn_genotype,
          MBIRA_Kvv_genotype, 
          HowdenGorrie_genotype,
          AGAR_genotype,
          Milan_genotype,
          HailuEthiopia_genotype,
          KayamaJapan_genotype,
          FeaseyMalawi_genotype, 
          EUSCAPE_EURECA_genotype,
          EgliSwitzerland_genotype,
          HRYC_Spain_genotype,
          IndiaGHRU_genotype,
          NeumannGermany_genotype,
          Whitelaw_CHAMPS_genotype,
          Oxfordshire_Kpn_genotype,
          Oxfordshire_Kvv_genotype,
          IZSAM_Italy_genotype, 
          LaPaz_Lazaro_genotype,
          HarrisMERINO_genotype,
          PachecoBrazil_genotype, 
          pooled_genotype), .id ="Dataset") %>%
  mutate(Dataset = (gsub("_genotype", "", Dataset))) %>%
  mutate(N=S+I+R) %>%
  mutate(RexcludeI_PPV = paste0(R,"/",(S+R), " (", round(R/(S+R)*100, 2), "%)")) %>%
  mutate(R_PPV = paste0(R,"/",(S+I+R), " (", round(R/(S+I+R)*100, 2), "%)")) %>%
  mutate(NWT_PPV = paste0((R+I),"/",(S+I+R), " (", round((R+I)/(S+I+R)*100, 2), "%)")) %>%

  select (-c(S,I,R,N))


pooled_dataset_genotype$RexcludeI_PPV[pooled_dataset_genotype$RexcludeI_PPV == '0/0 (NaN%)'] <- '-'
pooled_dataset_genotype$R_PPV[pooled_dataset_genotype$R_PPV == '0/0 (NaN%)'] <- '-'
pooled_dataset_genotype$NWT_PPV[pooled_dataset_genotype$NWT_PPV == '0/0 (NaN%)'] <- '-'



pooled_dataset_genotype$Dataset[pooled_dataset_genotype$Dataset == 'AGAR'] <- 'AGAR GnSOP'
pooled_dataset_genotype$Dataset[pooled_dataset_genotype$Dataset == 'EUSCAPE_EURECA'] <- 'EUSCAPE & EURECA'
pooled_dataset_genotype$Dataset[pooled_dataset_genotype$Dataset == 'EgliSwitzerland'] <- 'University of Zurich'
pooled_dataset_genotype$Dataset[pooled_dataset_genotype$Dataset == 'FeaseyMalawi'] <- 'Queen Elizabeth Central Hospital'
pooled_dataset_genotype$Dataset[pooled_dataset_genotype$Dataset == 'HRYC_Spain'] <- 'HURYC & CIBERINFEC'
pooled_dataset_genotype$Dataset[pooled_dataset_genotype$Dataset == 'HailuEthiopia'] <- 'Addis Ababa University*'
pooled_dataset_genotype$Dataset[pooled_dataset_genotype$Dataset == 'HowdenGorrie'] <- 'Controlling Superbugs study & Victorian CPE program'
pooled_dataset_genotype$Dataset[pooled_dataset_genotype$Dataset == 'IZSAM_Italy'] <- 'IZSAM'
pooled_dataset_genotype$Dataset[pooled_dataset_genotype$Dataset == 'IndiaGHRU'] <- 'India GHRU'
pooled_dataset_genotype$Dataset[pooled_dataset_genotype$Dataset == 'KayamaJapan'] <- 'JARBS-GNR'
pooled_dataset_genotype$Dataset[pooled_dataset_genotype$Dataset == 'LaPaz_Lazaro'] <- 'HULP'
pooled_dataset_genotype$Dataset[pooled_dataset_genotype$Dataset == 'LaPaz_Lazaro'] <- 'HULP'
pooled_dataset_genotype$Dataset[pooled_dataset_genotype$Dataset == 'MBIRA_Kpn'] <- 'MBIRA'
pooled_dataset_genotype$Dataset[pooled_dataset_genotype$Dataset == 'MBIRA_Kvv'] <- 'MBIRA*'
pooled_dataset_genotype$Dataset[pooled_dataset_genotype$Dataset == 'Milan'] <- 'IRCCS Ospedale San Raffaele'
pooled_dataset_genotype$Dataset[pooled_dataset_genotype$Dataset == 'NeumannGermany'] <- 'SCHARKI'
pooled_dataset_genotype$Dataset[pooled_dataset_genotype$Dataset == 'Oxfordshire_Kpn'] <- 'Oxfordshire'
pooled_dataset_genotype$Dataset[pooled_dataset_genotype$Dataset == 'Oxfordshire_Kvv'] <- 'Oxfordshire*'
pooled_dataset_genotype$Dataset[pooled_dataset_genotype$Dataset == 'Whitelaw_CHAMPS'] <- 'CHAMPS'
pooled_dataset_genotype$Dataset[pooled_dataset_genotype$Dataset == 'pooled'] <- 'Pooled'

geno_MIC <- merge(pooled_dataset_genotype, MIC_pred, by=c("Dataset","Group"))
geno_MIC <- merge(geno_MIC, MIC_pred_excludeI, by=c("Dataset","Group"))

geno_MIC<- geno_MIC %>% 
  mutate(Dataset = (gsub("EUSCAPE & EURECA", "A (EUCAST)", Dataset)))%>%
  mutate(Dataset = (gsub("India GHRU", "B (EUCAST)", Dataset)))%>% 
  mutate(Dataset = (gsub("AGAR GnSOP", "C (EUCAST)", Dataset)))%>% 
    mutate(Dataset = (gsub("Oxfordshire\\*", "XXXXXXX", Dataset)))%>% 
  mutate(Dataset = (gsub("Oxfordshire", "D (EUCAST)", Dataset)))%>% 
      mutate(Dataset = (gsub("XXXXXXX", "Oxfordshire (Kvv, EUCAST)", Dataset)))%>% 
    mutate(Dataset = (gsub("HURYC & CIBERINFEC", "E (EUCAST)", Dataset)))%>% 
  mutate(Dataset = (gsub("HULP", "F (EUCAST)", Dataset)))%>% 
  mutate(Dataset = (gsub("JARBS-GNR", "G (CLSI)", Dataset)))%>% 
      mutate(Dataset = (gsub("MBIRA\\*", "YYYYYYYY", Dataset)))%>% 
  mutate(Dataset = (gsub("MBIRA", "H (CLSI)", Dataset)))%>%
        mutate(Dataset = (gsub("YYYYYYYY", "MBIRA (Kvv, CLSI)", Dataset)))%>% 
  mutate(Dataset = (gsub("Controlling Superbugs study & Victorian CPE program", "I (CLSI)", Dataset)))%>% 
   mutate(Dataset = (gsub("Queen Elizabeth Central Hospital", "J* (EUCAST/BSAC)", Dataset)))%>% 
    mutate(Dataset = (gsub("Addis Ababa University\\*", "K* (Kvv, CLSI)", Dataset)))%>%
  mutate(Dataset = (gsub("University of Zurich", "L* (EUCAST)", Dataset)))%>%
  mutate(Dataset = (gsub("CHAMPS", "M* (CLSI)", Dataset)))

geno_MIC <- geno_MIC %>%
  rename("MIC median [IQR] (exclude I)"=IQR_excludeI) %>%
  rename("MIC median [IQR]"=IQR)
write_tsv(geno_MIC, "../tables/TableS11_extValidation_genotypeProfiles.tsv", na="-")

#showing R, NWT, R_excludeI

validation_summary_table_combined<-rbind(validation_summary_table, validation_summary_table_excludeI)

validation_summary_table_combined_long<-gather(validation_summary_table_combined, `Evaluation metric`, value, `Categorical agreement`:`%R`)

validation_summary_table_combined_long<- validation_summary_table_combined_long %>% mutate(value = as.numeric(gsub("%", "", value))) %>%
  mutate(`Evaluation metric` = (gsub("%R", "%R/NWT", `Evaluation metric`)))

validation_summary_table_combined_long$`Evaluation metric` = factor(validation_summary_table_combined_long$`Evaluation metric`, levels=c("Categorical agreement",'Sensitivity','Specificity',"ME", 'VME', "%R/NWT"))

validation_summary_table_median<-
validation_summary_table_combined_long %>% group_by(Prediction, `Evaluation metric`) %>% 
  summarise(value = median(as.numeric(value), na.rm = T)) %>%
  mutate(Dataset="Overall median") %>%
  mutate(Country="") %>%
  mutate(N="")

validation_summary_table_total <-
rbind(validation_summary_table_combined_long, validation_summary_table_median)

validation_summary_table_total <- full_join(validation_summary_table_total, SIR_table) %>% 
  mutate(Dataset = (gsub("Pooled", "All datasets combined", Dataset)))%>%
  mutate(Dataset_label=paste(Dataset, " (S=", S_total, ", I=", I_total, ", R=",R_total, ")", sep = ""))

validation_summary_table_total$Dataset_label[validation_summary_table_total$Dataset_label == 'Overall median (S=NA, I=NA, R=NA)'] <- 'Overall median'

# Get totals to explicitly write in label
colSums(SIR_table[,-1])
validation_summary_table_total$Dataset_label[validation_summary_table_total$Dataset_label == 'All datasets combined (S=NA, I=NA, R=NA)'] <- "All datasets combined (S=1648, I=624, R=4413)"

validation_summary_table_total$Dataset_label <- factor(validation_summary_table_total$Dataset_label, levels = c("EUSCAPE & EURECA (S=291, I=155, R=1687)", "JARBS-GNR (S=222, I=163, R=449)", "Queen Elizabeth Central Hospital (S=196, I=130, R=411)", "India GHRU (S=84, I=11, R=441)", "AGAR GnSOP (S=168, I=62, R=255)", "Oxfordshire (S=242, I=16, R=37)", "IZSAM (S=7, I=3, R=273)", "MBIRA (S=113, I=14, R=129)", "Controlling Superbugs study & Victorian CPE program (S=18, I=41, R=153)", "IRCCS Ospedale San Raffaele (S=9, I=2, R=187)", "HURYC & CIBERINFEC (S=20, I=7, R=146)", "HULP (S=11, I=1, R=136)", "MBIRA* (S=86, I=0, R=5)", "University of Zurich (S=11, I=8, R=53)", "Addis Ababa University* (S=37, I=5, R=28)", "Oxfordshire* (S=64, I=2, R=0)", "SCHARKI (S=51, I=3, R=2)", "CHAMPS (S=18, I=1, R=21)","All datasets combined (S=1648, I=624, R=4413)", "Overall median"))

validation_summary_table_plot <-ggplot(validation_summary_table_total, aes(as.numeric(value),fct_inorder(as.factor(Dataset_label)))) +
  geom_point(aes(shape = `Prediction`, size=`Prediction`, colour=`Prediction`), alpha=0.5) +
  scale_shape_manual(values = c(R=15, NWT=18, R_excludeI=20)) +
  scale_size_manual(values = c(R=3, NWT=3, R_excludeI=3)) +
  scale_colour_manual(values=c(R="#bd1515", NWT="#2c15ae", R_excludeI="#339966")) +
  scale_y_discrete(limits=rev)+
  facet_grid(`Evaluation metric`~., scales = "free", space='free')+ 
  labs(y="", x = "%") + 
  theme_bw()+
  theme(legend.title=element_blank())

ggsave(width=10, height=14, file="../figs/FigX_extValidation.pdf")

# Only include datasets with at least 10 S genomes and 10 R genomes, >25 genomes in total, <100% S/R genomes
validation_summary_table_total_exclude <- 
validation_summary_table_total %>%
  filter(S_total>10 | is.na(S_total)) %>%
  filter(R_total>10 | is.na(R_total)) %>%
  filter(N>25 | is.na(R_total)) 

validation_summary_table_plot_exclude <-ggplot(validation_summary_table_total_exclude, aes(as.numeric(value),fct_inorder(as.factor(Dataset_label)))) +
  geom_point(aes(shape = `Prediction`, size=`Prediction`, colour=`Prediction`), alpha=0.5) +
  scale_shape_manual(values = c(R=15, NWT=18, R_excludeI=20)) +
  scale_size_manual(values = c(R=3, NWT=3, R_excludeI=3)) +
  scale_colour_manual(values=c(R="#bd1515", NWT="#2c15ae", R_excludeI="#339966")) +
  scale_y_discrete(limits=rev)+
  facet_grid(`Evaluation metric`~., scales = "free", space='free')+ 
  labs(y="", x = "%") + 
  theme_bw()+
  theme(legend.title=element_blank())

ggsave(width=10, height=14, file="../figs/FigX_extValidation_exclude.pdf")

Figure 7 - external validation

HowdenGorrie_MIC <- readPNG("HowdenGorrie_ValidationOutput/MIC_PredRIS.png", native=TRUE)
MBIRA_MIC_Kpn <- readPNG("MBIRA_ValidationOutput/MIC_PredRIS_Kpn.png", native=TRUE)
MBIRA_MIC_Kvv <- readPNG("MBIRA_ValidationOutput/MIC_PredRIS_Kvv.png", native=TRUE)
AGAR_MIC <- readPNG("AGAR_ValidationOutput/MIC_PredRIS.png", native=TRUE)
Milan_MIC <- readPNG("MilanSanRafaelle_ValidationOutput/MIC_PredRIS.png", native=TRUE)
HailuEthiopia_DD <- readPNG("HailuEthiopia_ValidationOutput/DD_PredRIS.png", native=TRUE)
KayamaJapan_MIC <- readPNG("KayamaJapan_ValidationOutput/MIC_PredRIS.png", native=TRUE)
FeaseyMalawi_DD <- readPNG("FeaseyMalawi_ValidationOutput/DD_PredRIS.png", native=TRUE)
EUSCAPE_EURECA_MIC <- readPNG("EURECA_EUSCAPE_ValidationOutput/MIC_PredRIS.png", native=TRUE)


EgliSwitzerland_DD <- readPNG("EgliSwitzerland_2025_ValidationOutput/DD_PredRIS.png", native=TRUE)
HRYC_Spain_MIC <- readPNG("HRYC_Spain_ValidationOutput/MIC_PredRIS.png", native=TRUE)
IndiaGHRU_MIC <- readPNG("IndiaGHRU_2025_ValidationOutput/MIC_PredRIS.png", native=TRUE)
NeumannGermany_MIC <- readPNG("NeumannGermany_ValidationOutput/MIC_PredRIS.png", native=TRUE)

Whitelaw_CHAMPS_DD <- readPNG("Whitelaw_Stellenbosch_ValidationOutput/DD_PredRIS.png", native=TRUE)

Oxfordshire_Kpn_MIC <- readPNG("Oxfordshire_ValidationOutput/MIC_PredRIS_Kpn.png", native=TRUE)
Oxfordshire_Kvv_MIC <- readPNG("Oxfordshire_ValidationOutput/MIC_PredRIS_Kvv.png", native=TRUE)
IZSAM_Italy_MIC <- readPNG("IZSAM_Italy_ValidationOutput/MIC_PredRIS.png", native=TRUE)
LaPaz_Lazaro_MIC <- readPNG("IZSAM_Italy_ValidationOutput/MIC_PredRIS.png", native=TRUE)

HarrisMERINO_MIC <- readPNG("HarrisMERINO_ValidationOutput/MIC_PredRIS.png", native=TRUE)

PachecoBrazil_MIC <- readPNG("PachecoBrazil_ValidationOutput/MIC_PredRIS.png", native=TRUE)

patch <- (wrap_elements(EUSCAPE_EURECA_MIC) +
            ggtitle("A) EUSCAPE & EURECA MIC (n=2133)")  +
            wrap_elements(KayamaJapan_MIC) +
            ggtitle("B) JARBS-GNR Japan MIC (n=834)")+
            wrap_elements(IndiaGHRU_MIC) +
            ggtitle("C) India GHRU MIC (n=536)")+
            wrap_elements(AGAR_MIC) + 
            ggtitle("D) AGAR GnSOP MIC (n=485)") +
            wrap_elements(Oxfordshire_Kpn_MIC) + 
            ggtitle("E) Oxfordshire MIC (n=295)") + 
            wrap_elements(IZSAM_Italy_MIC) +
            ggtitle("F) IZSAM MIC (n=263)")+            
            wrap_elements(MBIRA_MIC_Kpn) + 
            ggtitle("G) MBIRA MIC (n=263)")+     
            wrap_elements(HowdenGorrie_MIC) +
            ggtitle("H) Controlling Superbugs study & \nVictorian CPE program MIC (n=212)") +
            wrap_elements(Milan_MIC) +
            ggtitle("I) IRCCS Ospedale San Raffaele MIC (n=198)")+
            wrap_elements(HRYC_Spain_MIC) +
            ggtitle("J) HURYC & CIBERINFEC MIC (n=173)")+
            wrap_elements(LaPaz_Lazaro_MIC) +
            ggtitle("K) HULP MIC (n=148)")+
            wrap_elements(MBIRA_MIC_Kvv) + 
            ggtitle("L) MBIRA* MIC (n=92)")+
            wrap_elements(Oxfordshire_Kvv_MIC) +
            ggtitle("M) Oxfordshire* MIC (n=66)")+
            wrap_elements(NeumannGermany_MIC) +
            ggtitle("N) SCHARKI MIC (n=56)")+  
            wrap_elements(PachecoBrazil_MIC) +
            ggtitle("O) Universidade Federal da \nBahia MIC (n=38)")+
            wrap_elements(HarrisMERINO_MIC) +
            ggtitle("P) MERINO Trial MIC (n=36)")+            
            
            wrap_elements(FeaseyMalawi_DD) +
            ggtitle("Q) Queen Elizabeth Central Hospital \n(Malawi) DD (n=737)") +
            wrap_elements(EgliSwitzerland_DD) +
            ggtitle("R) University of Zurich DD (n=72)")+
            wrap_elements(HailuEthiopia_DD) +
            ggtitle("S) Addis Ababa University* \n(Ethiopia) DD (n=70)")+
            wrap_elements(Whitelaw_CHAMPS_DD) +
            ggtitle("T) CHAMPS DD (n=40)")
          )+
  plot_layout(ncol = 3)

#ggsave(height=13, width=13, file="../figs/Fig7_extValidation.png")
#ggsave(height=13, width=13, file="../figs/Fig7_extValidation.pdf")

Figure S11 - external validation (supplementary - distribution vs. genotype profile and phenotype vs. genotype profile)

HowdenGorrie_dist <- readPNG("HowdenGorrie_ValidationOutput/MIC_PredGroup.png", native=TRUE)
HowdenGorrie_profile <- readPNG("HowdenGorrie_ValidationOutput/StackedBar_PredGroup_SIR.png", native=TRUE)

MBIRA_Kpn_dist <- readPNG("MBIRA_ValidationOutput/MIC_PredGroup_Kpn.png", native=TRUE)
MBIRA_Kpn_profile <- readPNG("MBIRA_ValidationOutput/StackedBar_PredGroup_SIR_Kpn.png", native=TRUE)

MBIRA_Kvv_dist <- readPNG("MBIRA_ValidationOutput/MIC_PredGroup_Kvv.png", native=TRUE)
MBIRA_Kvv_profile <- readPNG("MBIRA_ValidationOutput/StackedBar_PredGroup_SIR_Kvv.png", native=TRUE)

AGAR_dist <- readPNG("AGAR_ValidationOutput/MIC_PredGroup.png", native=TRUE)
AGAR_profile <- readPNG("AGAR_ValidationOutput/StackedBar_PredGroup_SIR.png", native=TRUE)

Milan_dist <- readPNG("MilanSanRafaelle_ValidationOutput/MIC_PredGroup.png", native=TRUE)
Milan_profile <- readPNG("MilanSanRafaelle_ValidationOutput/StackedBar_PredGroup_SIR.png", native=TRUE)

Hailu_dist <- readPNG("HailuEthiopia_ValidationOutput/DD_PredGroup.png", native=TRUE)
Hailu_profile <- readPNG("HailuEthiopia_ValidationOutput/StackedBar_PredGroup_SIR.png", native=TRUE)

Kayama_dist <- readPNG("KayamaJapan_ValidationOutput/MIC_PredGroup.png", native=TRUE)
Kayama_profile <- readPNG("KayamaJapan_ValidationOutput/StackedBar_PredGroup_SIR.png", native=TRUE)

Feasey_dist <- readPNG("FeaseyMalawi_ValidationOutput/DD_PredGroup.png", native=TRUE)
Feasey_profile <- readPNG("FeaseyMalawi_ValidationOutput/StackedBar_PredGroup_SIR.png", native=TRUE)
         
EUSCAPE_EURECA_dist <- readPNG("EURECA_EUSCAPE_ValidationOutput/MIC_PredGroup.png", native=TRUE)
EUSCAPE_EURECA_profile <- readPNG("EURECA_EUSCAPE_ValidationOutput/StackedBar_PredGroup_SIR.png", native=TRUE)     

EgliSwitzerland_dist <- readPNG("EgliSwitzerland_2025_ValidationOutput/DD_PredGroup.png", native=TRUE)
EgliSwitzerland_profile <- readPNG("EgliSwitzerland_2025_ValidationOutput/StackedBar_PredGroup_SIR.png", native=TRUE)           

HRYC_Spain_dist <- readPNG("HRYC_Spain_ValidationOutput/MIC_PredGroup.png", native=TRUE)
HRYC_Spain_profile <- readPNG("HRYC_Spain_ValidationOutput/StackedBar_PredGroup_SIR.png", native=TRUE)           
IndiaGHRU_dist <- readPNG("IndiaGHRU_2025_ValidationOutput/MIC_PredGroup.png", native=TRUE)
IndiaGHRU_profile <- readPNG("IndiaGHRU_2025_ValidationOutput/StackedBar_PredGroup_SIR.png", native=TRUE)           
NeumannGermany_dist <- readPNG("NeumannGermany_ValidationOutput/MIC_PredGroup.png", native=TRUE)
NeumannGermany_profile <- readPNG("NeumannGermany_ValidationOutput/StackedBar_PredGroup_SIR.png", native=TRUE)           

WhitelawCHAMPS_dist <- readPNG("Whitelaw_Stellenbosch_ValidationOutput/DD_PredGroup.png", native=TRUE)
WhitelawCHAMPS_profile <- readPNG("Whitelaw_Stellenbosch_ValidationOutput/StackedBar_PredGroup_SIR_DD.png", native=TRUE)    

Oxfordshire_Kpn_dist <- readPNG("Oxfordshire_ValidationOutput/MIC_PredGroup_Kpn.png", native=TRUE)
Oxfordshire_Kpn_profile <- readPNG("Oxfordshire_ValidationOutput/StackedBar_PredGroup_SIR_Kpn.png", native=TRUE)    

Oxfordshire_Kvv_dist <- readPNG("Oxfordshire_ValidationOutput/MIC_PredGroup_Kvv.png", native=TRUE)
Oxfordshire_Kvv_profile <- readPNG("Oxfordshire_ValidationOutput/StackedBar_PredGroup_SIR_Kvv.png", native=TRUE)    

IZSAM_Italy_dist <- readPNG("IZSAM_Italy_ValidationOutput/MIC_PredGroup.png", native=TRUE)
IZSAM_Italy_profile <- readPNG("IZSAM_Italy_ValidationOutput/StackedBar_PredGroup_SIR.png", native=TRUE)    
LaPaz_Lazaro_dist <- readPNG("LaPaz_Lazaro_ValidationOutput/MIC_PredGroup.png", native=TRUE)
LaPaz_Lazaro_profile <- readPNG("LaPaz_Lazaro_ValidationOutput/StackedBar_PredGroup_SIR.png", native=TRUE)    

Harris_MERINO_dist <- readPNG("HarrisMERINO_ValidationOutput/MIC_PredGroup.png", native=TRUE)
Harris_MERINO_profile <- readPNG("HarrisMERINO_ValidationOutput/StackedBar_PredGroup_SIR.png", native=TRUE)   

Pacheco_Brazil_dist <- readPNG("PachecoBrazil_ValidationOutput/MIC_PredGroup.png", native=TRUE)
Pacheco_Brazil_profile <- readPNG("PachecoBrazil_ValidationOutput/StackedBar_PredGroup_SIR.png", native=TRUE)   

  (wrap_elements(EUSCAPE_EURECA_dist) +ggtitle("i) A (EUCAST) / \n EURECA & EUSCAPE (n=2133) - MIC distribution vs.\n genotype profile") + wrap_elements(EUSCAPE_EURECA_profile) + ggtitle("ii) A (EUCAST) / \n EURECA & EUSCAPE (n=2133) - observed phenotype vs.\n genotype profile"))/
  
    (wrap_elements(IndiaGHRU_dist) +ggtitle("iii) B (EUCAST) / \nIndia GHRU (n=536) - MIC distribution vs.\n genotype profile") + wrap_elements(IndiaGHRU_profile) + ggtitle("iv) B (EUCAST) / \nIndia GHRU (n=536) - observed phenotype vs.\n genotype profile")) 

ggsave(height=13, width=11, file="../figs/FigS11_extValidation_genoprofiles_1.png")
ggsave(height=13, width=11, file="../figs/FigS11_extValidation_genoprofiles_1.pdf")

  (wrap_elements(AGAR_dist) +ggtitle("v) C (EUCAST) / \n AGAR GnSOP (n=485) - MIC distribution vs.\n genotype profile") + wrap_elements(AGAR_profile) + ggtitle("vi) C (EUCAST) / \n AGAR GnSOP (n=485) - observed phenotype vs.\n genotype profile")) / 
  (wrap_elements(Oxfordshire_Kpn_dist) +ggtitle("vii) D (EUCAST) / \n Oxfordshire (n=295) - MIC distribution vs.\n genotype profile") + wrap_elements(Oxfordshire_Kpn_profile) + ggtitle("viii) D (EUCAST) / \n Oxfordshire (n=295) - observed phenotype vs.\n genotype profile")) 

ggsave(height=13, width=11, file="../figs/FigS11_extValidation_genoprofiles_2.png")
ggsave(height=13, width=11, file="../figs/FigS11_extValidation_genoprofiles_2.pdf")

  (wrap_elements(HRYC_Spain_dist) +ggtitle("ix) E (EUCAST) / \n HURYC & CIBERINFEC (n=173) - MIC distribution vs.\n genotype profile") + wrap_elements(HRYC_Spain_profile) + ggtitle("x) E (EUCAST) \ \n HURYC & CIBERINFEC (n=173) \n - observed phenotype vs. genotype profile"))  / 
      (wrap_elements(LaPaz_Lazaro_dist) +ggtitle("xi) F (EUCAST) / \n HULP (n=148) - MIC distribution vs.\n genotype profile") + wrap_elements(LaPaz_Lazaro_profile) + ggtitle("xii) F (EUCAST) \ \n HULP (n=148) \n - observed phenotype vs. genotype profile")) 

ggsave(height=13, width=11, file="../figs/FigS11_extValidation_genoprofiles_3.png")
ggsave(height=13, width=11, file="../figs/FigS11_extValidation_genoprofiles_3.pdf")

(wrap_elements(Kayama_dist) +ggtitle("xiii) G (CLSI) / \n JARBS-GNR Japan (n=834) - MIC distribution vs.\n genotype profile") + wrap_elements(Kayama_profile) + ggtitle("xiv)  G (CLSI) / \n JARBS-GNR Japan (n=834) - observed phenotype vs.\n genotype profile")) /
  
(wrap_elements(MBIRA_Kpn_dist) +ggtitle("xv) H (CLSI) / \n MBIRA (n=263) - MIC distribution vs.\n genotype profile") + wrap_elements(MBIRA_Kpn_profile) + ggtitle("xvi) H (CLSI) / \n MBIRA (n=263) - observed phenotype vs.\n genotype profile")) 

ggsave(height=13, width=11, file="../figs/FigS11_extValidation_genoprofiles_4.png")
ggsave(height=13, width=11, file="../figs/FigS11_extValidation_genoprofiles_4.pdf")




(wrap_elements(HowdenGorrie_dist) +ggtitle("xvii)  I (CLSI) / \n Controlling Superbugs study & \nVictorian CPE program (n=212) - MIC distribution vs.\n genotype profile") + wrap_elements(HowdenGorrie_profile) + ggtitle("xviii)  I (CLSI) / \n Controlling Superbugs study & \nVictorian CPE program (n=212) - observed phenotype vs.\n genotype profile"))  /
(wrap_elements(Feasey_dist) +ggtitle("xix) J* (EUCAST/BSAC) / \n Queen Elizabeth Central Hospital Malawi (n=737) \n - DD distribution vs. genotype profile") + wrap_elements(Feasey_profile) + ggtitle("xx) J* (EUCAST/BSAC) / \n Queen Elizabeth Central Hospital Malawi (n=737) \n - observed phenotype vs. genotype profile")) 

ggsave(height=13, width=11, file="../figs/FigS11_extValidation_genoprofiles_5.png")
ggsave(height=13, width=11, file="../figs/FigS11_extValidation_genoprofiles_5.pdf")




  
(wrap_elements(Hailu_dist) +ggtitle("xxi) K* (Kvv, CLSI) / \n Addis Ababa University* (n=70) .\n - DD distribution vs. genotype profile") + wrap_elements(Hailu_profile) + ggtitle("xxii) K* (Kvv, CLSI) / \n Addis Ababa University* (n=70) \n - observed phenotype vs. genotype profile")) /

    (wrap_elements(EgliSwitzerland_dist) +ggtitle("xxiii) L* (EUCAST) / \n University of Zurich (n=72) - \n DD distribution vs. genotype profile") + wrap_elements(EgliSwitzerland_profile) + ggtitle("xxiv) L* (EUCAST) / \n University of Zurich (n=72) \n - observed phenotype vs. genotype profile")) 

ggsave(height=13, width=10, file="../figs/FigS11_extValidation_genoprofiles_6.png")
ggsave(height=13, width=10, file="../figs/FigS11_extValidation_genoprofiles_6.pdf")

(wrap_elements(WhitelawCHAMPS_dist) +ggtitle("xxv) M* (CLSI) / \n CHAMPS (n=40) - DD distribution vs.\n genotype profile") + wrap_elements(WhitelawCHAMPS_profile) + ggtitle("xxvi) M* (CLSI) / \n CHAMPS (n=40) - observed phenotype vs.\n genotype profile")) /
  
  (wrap_elements(IZSAM_Italy_dist) +ggtitle("xxvii) IZSAM (n=283) - MIC distribution vs.\n genotype profile") + wrap_elements(IZSAM_Italy_profile) + ggtitle("xxviii) IZSAM (n=283) - observed phenotype vs.\n genotype profile")) 

ggsave(height=13, width=11, file="../figs/FigS11_extValidation_genoprofiles_7.png")
ggsave(height=13, width=11, file="../figs/FigS11_extValidation_genoprofiles_7.pdf")


(wrap_elements(Milan_dist) +ggtitle("xxix) IRCCS Ospedale San Raffaele (n=198) -\n MIC distribution vs. genotype profile") + wrap_elements(Milan_profile) + ggtitle("xxx) IRCCS Ospedale San Raffaele (n=198) \n - observed phenotype vs. genotype profile")) /

(wrap_elements(MBIRA_Kvv_dist) +ggtitle("xxxi) MBIRA (Kvv, CLSI) (n=92) - MIC distribution vs.\n genotype profile") + wrap_elements(MBIRA_Kvv_profile) + ggtitle("xxxii) MBIRA (Kvv, CLSI) (n=92) - observed phenotype vs.\n genotype profile")) 

ggsave(height=13, width=10, file="../figs/FigS11_extValidation_genoprofiles_8.png")
ggsave(height=13, width=10, file="../figs/FigS11_extValidation_genoprofiles_8.pdf")


(wrap_elements(Oxfordshire_Kvv_dist) +ggtitle("xxxiii) Oxfordshire (Kvv, EUCAST) (n=66) \n - MIC distribution vs. genotype profile") + wrap_elements(Oxfordshire_Kvv_profile) + ggtitle("xxxiv) Oxfordshire (Kvv, EUCAST) (n=66) \n - observed phenotype vs. genotype profile")) /

(wrap_elements(NeumannGermany_dist) +ggtitle("xxxv) SCHARKI (n=56) - MIC distribution vs.\n genotype profile") + wrap_elements(NeumannGermany_profile) + ggtitle("xxxvi) SCHARKI (n=56) - observed phenotype vs.\n genotype profile")) 

ggsave(height=13, width=10, file="../figs/FigS11_extValidation_genoprofiles_9.png")
ggsave(height=13, width=10, file="../figs/FigS11_extValidation_genoprofiles_9.pdf")


(wrap_elements(Feasey_dist) +ggtitle("xxxvii) Universidade Federal da Bahia (n=38) \n - MIC distribution vs. genotype profile") + wrap_elements(Feasey_profile) + ggtitle("xxxviii) Universidade Federal da Bahia (n=38) \n - observed phenotype vs. genotype profile")) /
  
  (wrap_elements(EgliSwitzerland_dist) +ggtitle("xxxix) MERINO trial (n=36) \n - DD distribution vs. genotype profile") + wrap_elements(EgliSwitzerland_profile) + ggtitle("xl) MERINO trial (n=36) \n - observed phenotype vs. genotype profile")) 

ggsave(height=13, width=11, file="../figs/FigS11_extValidation_genoprofiles_10.png")
ggsave(height=13, width=11, file="../figs/FigS11_extValidation_genoprofiles_10.pdf")

numbers for summary

validation_phenotypes <- read_delim("../tables/TableS5_phenotype_metadata_validation.tsv")
validation_genotypes <- read_delim("../tables/TableS4_genotypes_validation.tsv")

validation_phenotypes_summary_table <- validation_phenotypes %>% group_by(Dataset, Dataset_alias, `Assembly method`, Laboratory.Typing.Method, Testing.standard, Laboratory.Typing.Platform) %>% count()

#Create foundation for the table
#write_tsv(validation_phenotypes_summary_table, "../tables/TableS3_validation_summary.tsv", na="-")
# Have to add EUSCAPE + SAN RAF data 
# EUSCAPE = 2133 Kpn
# IRCCS Ospedale San Raffaele = 198 Kpn 
validation_genotypes %>% count(species)

# EUSCAPE = 2133 MIC
# IRCCS Ospedale San Raffaele = 198 MIC
validation_phenotypes %>% count(Laboratory.Typing.Method)